From 9ff730511fdd6a6eaa7f8155bd4201d77a3c2ee5 Mon Sep 17 00:00:00 2001 From: rnou Date: Thu, 13 Apr 2023 12:10:50 +0200 Subject: [PATCH 01/17] Changelog change and branch WIP - REPL Added NUM_REPL env variable. (0 no replicas) NUM_REPL num replicas (Replicas < servers) Remove and truncate Metadata replication - WIP Metadata replication - Reattempt on stat minimal compilation issues (c++20), srand for repl Bitset Bit set proposal (WIP) Read - Write with bitset (<1024 chunks) Changed bitset to vector Added get_fs_config reattempt Some more resilience on create Added Replica_Check on write (disabled) Added helper vector-bitset functions --- CHANGELOG.md | 4 + README.md | 6 + include/client/env.hpp | 2 +- include/client/preload_context.hpp | 7 + include/client/rpc/forward_data.hpp | 11 +- include/client/rpc/forward_metadata.hpp | 20 +- include/client/rpc/rpc_types.hpp | 50 +++-- include/common/rpc/distributor.hpp | 60 ++++-- include/common/rpc/rpc_types.hpp | 12 +- include/common/rpc/rpc_util.hpp | 15 ++ src/client/gkfs_functions.cpp | 94 +++++++-- src/client/preload.cpp | 4 + src/client/preload_context.cpp | 12 ++ src/client/preload_util.cpp | 18 +- src/client/rpc/forward_data.cpp | 217 ++++++++++++++------ src/client/rpc/forward_management.cpp | 33 ++- src/client/rpc/forward_metadata.cpp | 259 +++++++++++++++--------- src/common/rpc/distributor.cpp | 70 +++++-- src/common/rpc/rpc_util.cpp | 108 ++++++++++ src/daemon/handler/srv_data.cpp | 28 ++- tests/unit/test_guided_distributor.cpp | 28 +-- 21 files changed, 761 insertions(+), 297 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f360b69b9..f60ddb994 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Replication without using the server. NUM_REPL (0 < NUM_REPL < num_servers) env variable defines the number of +replicas ([!166](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/141)). +- Modified write and reads to use a bitset instead of the traditional hash per chunk in the server. +- Added reattemp support in get_fs_config to other servers, when the initial server fails. ### New diff --git a/README.md b/README.md index 6f571eff6..fff5deead 100644 --- a/README.md +++ b/README.md @@ -319,6 +319,12 @@ Support for fstat in renamed files is included. This is disabled by default. +### Replication +The user can enable the data replication feature by setting the replication environment variable: +`LIBGKFS_NUM_REPL=`. +The number of replicas should go from 0 to the number of servers-1. +The replication environment variable can be set up for each client, independently. + ## Acknowledgment This software was partially supported by the EC H2020 funded NEXTGenIO project (Project ID: 671951, www.nextgenio.eu). diff --git a/include/client/env.hpp b/include/client/env.hpp index c35ae1ad3..6cd928226 100644 --- a/include/client/env.hpp +++ b/include/client/env.hpp @@ -51,7 +51,7 @@ static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); #ifdef GKFS_ENABLE_FORWARDING static constexpr auto FORWARDING_MAP_FILE = ADD_PREFIX("FORWARDING_MAP_FILE"); #endif - +static constexpr auto NUM_REPL = ADD_PREFIX("NUM_REPL"); } // namespace gkfs::env #undef ADD_PREFIX diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index 26875435b..c3f463dbb 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -104,6 +104,7 @@ private: bool internal_fds_must_relocate_; std::bitset protected_fds_; std::string hostname; + int replicas_; public: static PreloadContext* @@ -215,6 +216,12 @@ public: std::string get_hostname(); + + void + set_replicas(const int repl); + + int + get_replicas(); }; } // namespace preload diff --git a/include/client/rpc/forward_data.hpp b/include/client/rpc/forward_data.hpp index 8518e997f..45384a157 100644 --- a/include/client/rpc/forward_data.hpp +++ b/include/client/rpc/forward_data.hpp @@ -30,6 +30,9 @@ #ifndef GEKKOFS_CLIENT_FORWARD_DATA_HPP #define GEKKOFS_CLIENT_FORWARD_DATA_HPP +#include +#include +#include namespace gkfs::rpc { struct ChunkStat { @@ -43,14 +46,16 @@ struct ChunkStat { std::pair forward_write(const std::string& path, const void* buf, off64_t offset, - size_t write_size); + size_t write_size, const int8_t num_copy = 0); std::pair forward_read(const std::string& path, void* buf, off64_t offset, - size_t read_size); + size_t read_size, const int8_t num_copies, + std::set& failed); int -forward_truncate(const std::string& path, size_t current_size, size_t new_size); +forward_truncate(const std::string& path, size_t current_size, size_t new_size, + const int8_t num_copies); std::pair forward_get_chunk_stat(); diff --git a/include/client/rpc/forward_metadata.hpp b/include/client/rpc/forward_metadata.hpp index 564dc903b..97849a575 100644 --- a/include/client/rpc/forward_metadata.hpp +++ b/include/client/rpc/forward_metadata.hpp @@ -50,10 +50,10 @@ class Metadata; namespace rpc { int -forward_create(const std::string& path, mode_t mode); +forward_create(const std::string& path, mode_t mode, const int copy); int -forward_stat(const std::string& path, std::string& attr); +forward_stat(const std::string& path, std::string& attr, const int copy); #ifdef HAS_RENAME int @@ -62,22 +62,24 @@ forward_rename(const std::string& oldpath, const std::string& newpath, #endif // HAS_RENAME int -forward_remove(const std::string& path); +forward_remove(const std::string& path, const int8_t num_copies); int -forward_decr_size(const std::string& path, size_t length); +forward_decr_size(const std::string& path, size_t length, const int copy); int -forward_update_metadentry( - const std::string& path, const gkfs::metadata::Metadata& md, - const gkfs::metadata::MetadentryUpdateFlags& md_flags); +forward_update_metadentry(const std::string& path, + const gkfs::metadata::Metadata& md, + const gkfs::metadata::MetadentryUpdateFlags& md_flags, + const int copy); std::pair forward_update_metadentry_size(const std::string& path, size_t size, - off64_t offset, bool append_flag); + off64_t offset, bool append_flag, + const int num_copies); std::pair -forward_get_metadentry_size(const std::string& path); +forward_get_metadentry_size(const std::string& path, const int copy); std::pair> forward_get_dirents(const std::string& path); diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index 5993a31c3..bc8afd676 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -1469,11 +1469,11 @@ struct write_data { public: input(const std::string& path, int64_t offset, uint64_t host_id, - uint64_t host_size, uint64_t chunk_n, uint64_t chunk_start, - uint64_t chunk_end, uint64_t total_chunk_size, - const hermes::exposed_memory& buffers) + uint64_t host_size, const std::string& wbitset, uint64_t chunk_n, + uint64_t chunk_start, uint64_t chunk_end, + uint64_t total_chunk_size, const hermes::exposed_memory& buffers) : m_path(path), m_offset(offset), m_host_id(host_id), - m_host_size(host_size), m_chunk_n(chunk_n), + m_host_size(host_size), m_wbitset(wbitset), m_chunk_n(chunk_n), m_chunk_start(chunk_start), m_chunk_end(chunk_end), m_total_chunk_size(total_chunk_size), m_buffers(buffers) {} @@ -1512,6 +1512,11 @@ struct write_data { return m_chunk_n; } + std::string + wbitset() const { + return m_wbitset; + } + uint64_t chunk_start() const { return m_chunk_start; @@ -1535,15 +1540,16 @@ struct write_data { explicit input(const rpc_write_data_in_t& other) : m_path(other.path), m_offset(other.offset), m_host_id(other.host_id), m_host_size(other.host_size), - m_chunk_n(other.chunk_n), m_chunk_start(other.chunk_start), - m_chunk_end(other.chunk_end), + m_wbitset(other.wbitset), m_chunk_n(other.chunk_n), + m_chunk_start(other.chunk_start), m_chunk_end(other.chunk_end), m_total_chunk_size(other.total_chunk_size), m_buffers(other.bulk_handle) {} explicit operator rpc_write_data_in_t() { - return {m_path.c_str(), m_offset, m_host_id, - m_host_size, m_chunk_n, m_chunk_start, - m_chunk_end, m_total_chunk_size, hg_bulk_t(m_buffers)}; + return {m_path.c_str(), m_offset, m_host_id, + m_host_size, m_wbitset.c_str(), m_chunk_n, + m_chunk_start, m_chunk_end, m_total_chunk_size, + hg_bulk_t(m_buffers)}; } private: @@ -1551,6 +1557,7 @@ struct write_data { int64_t m_offset; uint64_t m_host_id; uint64_t m_host_size; + std::string m_wbitset; uint64_t m_chunk_n; uint64_t m_chunk_start; uint64_t m_chunk_end; @@ -1647,11 +1654,11 @@ struct read_data { public: input(const std::string& path, int64_t offset, uint64_t host_id, - uint64_t host_size, uint64_t chunk_n, uint64_t chunk_start, - uint64_t chunk_end, uint64_t total_chunk_size, - const hermes::exposed_memory& buffers) + uint64_t host_size, const std::string& wbitset, uint64_t chunk_n, + uint64_t chunk_start, uint64_t chunk_end, + uint64_t total_chunk_size, const hermes::exposed_memory& buffers) : m_path(path), m_offset(offset), m_host_id(host_id), - m_host_size(host_size), m_chunk_n(chunk_n), + m_host_size(host_size), m_wbitset(wbitset), m_chunk_n(chunk_n), m_chunk_start(chunk_start), m_chunk_end(chunk_end), m_total_chunk_size(total_chunk_size), m_buffers(buffers) {} @@ -1685,6 +1692,11 @@ struct read_data { return m_host_size; } + std::string + wbitset() const { + return m_wbitset; + } + uint64_t chunk_n() const { return m_chunk_n; @@ -1713,15 +1725,16 @@ struct read_data { explicit input(const rpc_read_data_in_t& other) : m_path(other.path), m_offset(other.offset), m_host_id(other.host_id), m_host_size(other.host_size), - m_chunk_n(other.chunk_n), m_chunk_start(other.chunk_start), - m_chunk_end(other.chunk_end), + m_wbitset(other.wbitset), m_chunk_n(other.chunk_n), + m_chunk_start(other.chunk_start), m_chunk_end(other.chunk_end), m_total_chunk_size(other.total_chunk_size), m_buffers(other.bulk_handle) {} explicit operator rpc_read_data_in_t() { - return {m_path.c_str(), m_offset, m_host_id, - m_host_size, m_chunk_n, m_chunk_start, - m_chunk_end, m_total_chunk_size, hg_bulk_t(m_buffers)}; + return {m_path.c_str(), m_offset, m_host_id, + m_host_size, m_wbitset.c_str(), m_chunk_n, + m_chunk_start, m_chunk_end, m_total_chunk_size, + hg_bulk_t(m_buffers)}; } private: @@ -1729,6 +1742,7 @@ struct read_data { int64_t m_offset; uint64_t m_host_id; uint64_t m_host_size; + std::string m_wbitset; uint64_t m_chunk_n; uint64_t m_chunk_start; uint64_t m_chunk_end; diff --git a/include/common/rpc/distributor.hpp b/include/common/rpc/distributor.hpp index b42d1ae30..b6c09a0e0 100644 --- a/include/common/rpc/distributor.hpp +++ b/include/common/rpc/distributor.hpp @@ -48,15 +48,20 @@ public: localhost() const = 0; virtual host_t - locate_data(const std::string& path, const chunkid_t& chnk_id) const = 0; + locate_data(const std::string& path, const chunkid_t& chnk_id, + const int num_copy) const = 0; // TODO: We need to pass hosts_size in the server side, because the number // of servers are not defined (in startup) + + virtual unsigned int + hosts_size() const = 0; + virtual host_t locate_data(const std::string& path, const chunkid_t& chnk_id, - unsigned int hosts_size) = 0; + unsigned int hosts_size, const int num_copy) = 0; virtual host_t - locate_file_metadata(const std::string& path) const = 0; + locate_file_metadata(const std::string& path, const int num_copy) const = 0; virtual std::vector locate_directory_metadata(const std::string& path) const = 0; @@ -75,19 +80,23 @@ public: SimpleHashDistributor(host_t localhost, unsigned int hosts_size); + unsigned int + hosts_size() const override; + host_t localhost() const override; host_t - locate_data(const std::string& path, - const chunkid_t& chnk_id) const override; + locate_data(const std::string& path, const chunkid_t& chnk_id, + const int num_copy) const override; host_t locate_data(const std::string& path, const chunkid_t& chnk_id, - unsigned int host_size); + unsigned int host_size, const int num_copy); host_t - locate_file_metadata(const std::string& path) const override; + locate_file_metadata(const std::string& path, + const int num_copy) const override; std::vector locate_directory_metadata(const std::string& path) const override; @@ -96,6 +105,7 @@ public: class LocalOnlyDistributor : public Distributor { private: host_t localhost_; + unsigned int hosts_size_{0}; public: explicit LocalOnlyDistributor(host_t localhost); @@ -103,12 +113,16 @@ public: host_t localhost() const override; + unsigned int + hosts_size() const override; + host_t - locate_data(const std::string& path, - const chunkid_t& chnk_id) const override; + locate_data(const std::string& path, const chunkid_t& chnk_id, + const int num_copy) const override; host_t - locate_file_metadata(const std::string& path) const override; + locate_file_metadata(const std::string& path, + const int num_copy) const override; std::vector locate_directory_metadata(const std::string& path) const override; @@ -117,7 +131,7 @@ public: class ForwarderDistributor : public Distributor { private: host_t fwd_host_; - unsigned int hosts_size_; + unsigned int hosts_size_{0}; std::vector all_hosts_; std::hash str_hash; @@ -127,16 +141,20 @@ public: host_t localhost() const override final; + unsigned int + hosts_size() const override; + host_t - locate_data(const std::string& path, - const chunkid_t& chnk_id) const override final; + locate_data(const std::string& path, const chunkid_t& chnk_id, + const int num_copy) const override final; host_t locate_data(const std::string& path, const chunkid_t& chnk_id, - unsigned int host_size) override final; + unsigned int host_size, const int num_copy) override final; host_t - locate_file_metadata(const std::string& path) const override; + locate_file_metadata(const std::string& path, + const int num_copy) const override; std::vector locate_directory_metadata(const std::string& path) const override; @@ -176,16 +194,20 @@ public: host_t localhost() const override; + unsigned int + hosts_size() const override; + host_t - locate_data(const std::string& path, - const chunkid_t& chnk_id) const override; + locate_data(const std::string& path, const chunkid_t& chnk_id, + const int num_copy) const override; host_t locate_data(const std::string& path, const chunkid_t& chnk_id, - unsigned int host_size); + unsigned int host_size, const int num_copy); host_t - locate_file_metadata(const std::string& path) const override; + locate_file_metadata(const std::string& path, + const int num_copy) const override; std::vector locate_directory_metadata(const std::string& path) const override; diff --git a/include/common/rpc/rpc_types.hpp b/include/common/rpc/rpc_types.hpp index 19612c45c..d28798d52 100644 --- a/include/common/rpc/rpc_types.hpp +++ b/include/common/rpc/rpc_types.hpp @@ -89,9 +89,9 @@ MERCURY_GEN_PROC( rpc_read_data_in_t, ((hg_const_string_t) (path))((int64_t) (offset))( (hg_uint64_t) (host_id))((hg_uint64_t) (host_size))( - (hg_uint64_t) (chunk_n))((hg_uint64_t) (chunk_start))( - (hg_uint64_t) (chunk_end))((hg_uint64_t) (total_chunk_size))( - (hg_bulk_t) (bulk_handle))) + (hg_const_string_t) (wbitset))((hg_uint64_t) (chunk_n))( + (hg_uint64_t) (chunk_start))((hg_uint64_t) (chunk_end))( + (hg_uint64_t) (total_chunk_size))((hg_bulk_t) (bulk_handle))) MERCURY_GEN_PROC(rpc_data_out_t, ((int32_t) (err))((hg_size_t) (io_size))) @@ -99,9 +99,9 @@ MERCURY_GEN_PROC( rpc_write_data_in_t, ((hg_const_string_t) (path))((int64_t) (offset))( (hg_uint64_t) (host_id))((hg_uint64_t) (host_size))( - (hg_uint64_t) (chunk_n))((hg_uint64_t) (chunk_start))( - (hg_uint64_t) (chunk_end))((hg_uint64_t) (total_chunk_size))( - (hg_bulk_t) (bulk_handle))) + (hg_const_string_t) (wbitset))((hg_uint64_t) (chunk_n))( + (hg_uint64_t) (chunk_start))((hg_uint64_t) (chunk_end))( + (hg_uint64_t) (total_chunk_size))((hg_bulk_t) (bulk_handle))) MERCURY_GEN_PROC(rpc_get_dirents_in_t, ((hg_const_string_t) (path))((hg_bulk_t) (bulk_handle))) diff --git a/include/common/rpc/rpc_util.hpp b/include/common/rpc/rpc_util.hpp index eb595596c..53e9aafb3 100644 --- a/include/common/rpc/rpc_util.hpp +++ b/include/common/rpc/rpc_util.hpp @@ -35,6 +35,9 @@ extern "C" { } #include +#include +#include +#include namespace gkfs::rpc { @@ -49,6 +52,18 @@ std::string get_host_by_name(const std::string& hostname); #endif +bool +get_bitset(const std::vector& data, const uint16_t position); + +void +set_bitset(std::vector& data, const uint16_t position); + +std::string +compressBitset(const std::vector& bytes); + +std::vector +decompressBitset(const std::string& compressedString); + } // namespace gkfs::rpc #endif // GEKKOFS_COMMON_RPC_UTILS_HPP diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 282a3eae9..8acb587e4 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -298,11 +298,21 @@ gkfs_create(const std::string& path, mode_t mode) { if(check_parent_dir(path)) { return -1; } - auto err = gkfs::rpc::forward_create(path, mode); - if(err) { - errno = err; + // Write to all replicas, at least one need to success + bool success = false; + for(auto copy = 0; copy < CTX->get_replicas() + 1; copy++) { + auto err = gkfs::rpc::forward_create(path, mode, copy); + if(err) { + errno = err; + } else { + success = true; + errno = 0; + } + } + if(!success) { return -1; } + return 0; } @@ -340,7 +350,7 @@ gkfs_remove(const std::string& path) { return -1; } } - auto err = gkfs::rpc::forward_remove(new_path); + auto err = gkfs::rpc::forward_remove(new_path, CTX->get_replicas()); if(err) { errno = err; return -1; @@ -350,7 +360,7 @@ gkfs_remove(const std::string& path) { #endif // HAS_RENAME #endif // HAS_SYMLINKS - auto err = gkfs::rpc::forward_remove(path); + auto err = gkfs::rpc::forward_remove(path, CTX->get_replicas()); if(err) { errno = err; return -1; @@ -406,6 +416,7 @@ gkfs_access(const std::string& path, const int mask, bool follow_links) { * We use blocks to determine if the file is a renamed file. * If the file is re-renamed (a->b->a) a recovers the block of b * and we delete b. + * There is no support for replication in rename * @param old_path * @param new_path * @return 0 on success, -1 on failure @@ -441,14 +452,14 @@ gkfs_rename(const string& old_path, const string& new_path) { md_old.value().target_path(""); auto err = gkfs::rpc::forward_update_metadentry( - new_path, md_old.value(), flags); + new_path, md_old.value(), flags, 0); if(err) { errno = err; return -1; } // Delete old file - err = gkfs::rpc::forward_remove(old_path); + err = gkfs::rpc::forward_remove(old_path, CTX->get_replicas()); if(err) { errno = err; return -1; @@ -674,7 +685,9 @@ gkfs_lseek(shared_ptr gkfs_fd, off_t offset, gkfs_fd->pos(gkfs_fd->pos() + offset); break; case SEEK_END: { - auto ret = gkfs::rpc::forward_get_metadentry_size(gkfs_fd->path()); + // TODO: handle replicas + auto ret = + gkfs::rpc::forward_get_metadentry_size(gkfs_fd->path(), 0); auto err = ret.first; if(err) { errno = err; @@ -723,14 +736,17 @@ gkfs_truncate(const std::string& path, off_t old_size, off_t new_size) { if(new_size == old_size) { return 0; } - auto err = gkfs::rpc::forward_decr_size(path, new_size); - if(err) { - LOG(DEBUG, "Failed to decrease size"); - errno = err; - return -1; + for(auto copy = 0; copy < (CTX->get_replicas() + 1); copy++) { + auto err = gkfs::rpc::forward_decr_size(path, new_size, copy); + if(err) { + LOG(DEBUG, "Failed to decrease size"); + errno = err; + return -1; + } } - err = gkfs::rpc::forward_truncate(path, old_size, new_size); + auto err = gkfs::rpc::forward_truncate(path, old_size, new_size, + CTX->get_replicas()); if(err) { LOG(DEBUG, "Failed to truncate data"); errno = err; @@ -864,9 +880,11 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, } auto path = make_unique(file->path()); auto is_append = file->get_flag(gkfs::filemap::OpenFile_flags::append); + auto write_size = 0; + auto num_replicas = CTX->get_replicas(); auto ret_offset = gkfs::rpc::forward_update_metadentry_size( - *path, count, offset, is_append); + *path, count, offset, is_append, num_replicas); auto err = ret_offset.first; if(err) { LOG(ERROR, "update_metadentry_size() failed with err '{}'", err); @@ -888,8 +906,23 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, offset = ret_offset.second; } - auto ret_write = gkfs::rpc::forward_write(*path, buf, offset, count); + auto ret_write = gkfs::rpc::forward_write(*path, buf, offset, count, 0); err = ret_write.first; + write_size = ret_write.second; + + if(num_replicas > 0) { + auto ret_write_repl = + gkfs::rpc::forward_write(*path, buf, append_flag, offset, count, + updated_size, num_replicas); + + if(err and ret_write_repl.first == 0) { + // We succesfully write the data to some replica + err = ret_write_repl.first; + // Write size will be wrong + write_size = ret_write_repl.second; + } + } + if(err) { LOG(WARNING, "gkfs::rpc::forward_write() failed with err '{}'", err); errno = err; @@ -897,14 +930,14 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, } if(update_pos) { // Update offset in file descriptor in the file map - file->pos(offset + ret_write.second); + file->pos(offset + write_size); } - if(static_cast(ret_write.second) != count) { + if(static_cast(write_size) != count) { LOG(WARNING, "gkfs::rpc::forward_write() wrote '{}' bytes instead of '{}'", - ret_write.second, count); + write_size, count); } - return ret_write.second; // return written size + return write_size; // return written size } /** @@ -1024,7 +1057,24 @@ gkfs_pread(std::shared_ptr file, char* buf, if constexpr(gkfs::config::io::zero_buffer_before_read) { memset(buf, 0, sizeof(char) * count); } - auto ret = gkfs::rpc::forward_read(file->path(), buf, offset, count); + std::pair ret; + std::set failed; // set with failed targets. + if(CTX->get_replicas() != 0) { + + ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, + CTX->get_replicas(), failed); + while(ret.first == EIO) { + ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, + CTX->get_replicas(), failed); + LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'", + ret.first); + } + + } else { + ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, 0, + failed); + } + auto err = ret.first; if(err) { LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'", err); @@ -1192,7 +1242,7 @@ gkfs_rmdir(const std::string& path) { errno = ENOTEMPTY; return -1; } - err = gkfs::rpc::forward_remove(path); + err = gkfs::rpc::forward_remove(path, CTX->get_replicas()); if(err) { errno = err; return -1; diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 7b05ca9a3..d9b242636 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -238,6 +238,9 @@ init_environment() { EXIT_FAILURE, "Unable to fetch file system configurations from daemon process through RPC."); } + if(CTX->get_replicas() > 0) { + srand(time(NULL)); + } LOG(INFO, "Environment initialization successful."); } @@ -275,6 +278,7 @@ init_preload() { gkfs::path::init_cwd(); LOG(DEBUG, "Current working directory: '{}'", CTX->cwd()); + LOG(DEBUG, "Number of replicas : '{}'", CTX->get_replicas()); gkfs::preload::init_environment(); CTX->enable_interception(); diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 236d2583f..da534e6d0 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -65,6 +65,8 @@ PreloadContext::PreloadContext() char host[255]; gethostname(host, 255); hostname = host; + PreloadContext::set_replicas( + std::stoi(gkfs::env::get_var(gkfs::env::NUM_REPL, "0"))); } void @@ -447,5 +449,15 @@ PreloadContext::get_hostname() { return hostname; } +void +PreloadContext::set_replicas(const int repl) { + replicas_ = repl; +} + +int +PreloadContext::get_replicas() { + return replicas_; +} + } // namespace preload } // namespace gkfs diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index 2a77c37a3..4a9c2b1c4 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -200,16 +200,26 @@ namespace gkfs::utils { optional get_metadata(const string& path, bool follow_links) { std::string attr; - auto err = gkfs::rpc::forward_stat(path, attr); + auto err = gkfs::rpc::forward_stat(path, attr, 0); + // TODO: retry on failure + if(err) { - errno = err; - return {}; + auto copy = 1; + while(copy < CTX->get_replicas() + 1 && err) { + LOG(ERROR, "Retrying Stat on replica {} {}", copy, follow_links); + err = gkfs::rpc::forward_stat(path, attr, copy); + copy++; + } + if(err) { + errno = err; + return {}; + } } #ifdef HAS_SYMLINKS if(follow_links) { gkfs::metadata::Metadata md{attr}; while(md.is_link()) { - err = gkfs::rpc::forward_stat(md.target_path(), attr); + err = gkfs::rpc::forward_stat(md.target_path(), attr, 0); if(err) { errno = err; return {}; diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 93d72efcd..f01e149e7 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -34,6 +34,7 @@ #include #include +#include #include @@ -48,15 +49,20 @@ namespace gkfs::rpc { /** * Send an RPC request to write from a buffer. + * There is a bitset of 1024 chunks to tell the server + * which chunks to process. Exceeding this value will work without + * replication. Another way is to leverage mercury segments. + * TODO: Decide how to manage a write to a replica that doesn't exist * @param path * @param buf * @param append_flag * @param write_size + * @param num_copies number of replicas * @return pair */ pair forward_write(const string& path, const void* buf, const off64_t offset, - const size_t write_size) { + const size_t write_size, const int8_t num_copies) { // import pow2-optimized arithmetic functions using namespace gkfs::utils::arithmetic; @@ -69,35 +75,50 @@ forward_write(const string& path, const void* buf, const off64_t offset, auto chnk_end = block_index((offset + write_size) - 1, gkfs::config::rpc::chunksize); + auto chnk_total = (chnk_end - chnk_start) + 1; + // Collect all chunk ids within count that have the same destination so // that those are send in one rpc bulk transfer std::map> target_chnks{}; + // contains the target ids, used to access the target_chnks map. // First idx is chunk with potential offset std::vector targets{}; // targets for the first and last chunk as they need special treatment - uint64_t chnk_start_target = 0; - uint64_t chnk_end_target = 0; + // We need a set to manage replicas. + std::set chnk_start_target{}; + std::set chnk_end_target{}; - for(uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { - auto target = CTX->distributor()->locate_data(path, chnk_id); + std::unordered_map> write_ops_vect; - if(target_chnks.count(target) == 0) { - target_chnks.insert( - std::make_pair(target, std::vector{chnk_id})); - targets.push_back(target); - } else { - target_chnks[target].push_back(chnk_id); - } + // If num_copies is 0, we do the normal write operation. Otherwise + // we process all the replicas. + for(uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { + for(auto copy = num_copies ? 1 : 0; copy < num_copies + 1; copy++) { + auto target = CTX->distributor()->locate_data(path, chnk_id, copy); + + if(write_ops_vect.find(target) == write_ops_vect.end()) + write_ops_vect[target] = + std::vector(((chnk_total + 7) / 8)); + gkfs::rpc::set_bitset(write_ops_vect[target], chnk_id - chnk_start); + + if(target_chnks.count(target) == 0) { + target_chnks.insert( + std::make_pair(target, std::vector{chnk_id})); + targets.push_back(target); + } else { + target_chnks[target].push_back(chnk_id); + } - // set first and last chnk targets - if(chnk_id == chnk_start) { - chnk_start_target = target; - } + // set first and last chnk targets + if(chnk_id == chnk_start) { + chnk_start_target.insert(target); + } - if(chnk_id == chnk_end) { - chnk_end_target = target; + if(chnk_id == chnk_end) { + chnk_end_target.insert(target); + } } } @@ -133,13 +154,13 @@ forward_write(const string& path, const void* buf, const off64_t offset, target_chnks[target].size() * gkfs::config::rpc::chunksize; // receiver of first chunk must subtract the offset from first chunk - if(target == chnk_start_target) { + if(chnk_start_target.end() != chnk_start_target.find(target)) { total_chunk_size -= block_overrun(offset, gkfs::config::rpc::chunksize); } // receiver of last chunk must subtract - if(target == chnk_end_target && + if(chnk_end_target.end() != chnk_end_target.find(target) && !is_aligned(offset + write_size, gkfs::config::rpc::chunksize)) { total_chunk_size -= block_underrun(offset + write_size, gkfs::config::rpc::chunksize); @@ -148,7 +169,6 @@ forward_write(const string& path, const void* buf, const off64_t offset, auto endp = CTX->hosts().at(target); try { - LOG(DEBUG, "Sending RPC ..."); gkfs::rpc::write_data::input in( @@ -158,6 +178,7 @@ forward_write(const string& path, const void* buf, const off64_t offset, block_overrun(offset, gkfs::config::rpc::chunksize), target, CTX->hosts().size(), // number of chunks handled by that destination + gkfs::rpc::compressBitset(write_ops_vect[target]), target_chnks[target].size(), // chunk start id of this write chnk_start, @@ -175,25 +196,26 @@ forward_write(const string& path, const void* buf, const off64_t offset, ld_network_service->post(endp, in)); LOG(DEBUG, - "host: {}, path: \"{}\", chunks: {}, size: {}, offset: {}", - target, path, in.chunk_n(), total_chunk_size, in.offset()); - + "host: {}, path: \"{}\", chunk_start: {}, chunk_end: {}, chunks: {}, size: {}, offset: {}", + target, path, chnk_start, chnk_end, in.chunk_n(), + total_chunk_size, in.offset()); } catch(const std::exception& ex) { LOG(ERROR, "Unable to send non-blocking rpc for " "path \"{}\" [peer: {}]", path, target); - return make_pair(EBUSY, 0); + if(num_copies == 0) + return make_pair(EBUSY, 0); } } - // Wait for RPC responses and then get response and add it to out_size - // which is the written size All potential outputs are served to free - // resources regardless of errors, although an errorcode is set. auto err = 0; ssize_t out_size = 0; std::size_t idx = 0; - +#ifdef REPLICA_CHECK + std::vector fill(chnk_total); + auto write_ops = write_ops_vect.begin(); +#endif for(const auto& h : handles) { try { // XXX We might need a timeout here to not wait forever for an @@ -203,18 +225,52 @@ forward_write(const string& path, const void* buf, const off64_t offset, if(out.err() != 0) { LOG(ERROR, "Daemon reported error: {}", out.err()); err = out.err(); + } else { + out_size += static_cast(out.io_size()); +#ifdef REPLICA_CHECK + if(num_copies) { + if(fill.size() == 0) { + fill = write_ops->second; + } else { + for(size_t i = 0; i < fill.size(); i++) { + fill[i] |= write_ops->second[i]; + } + } + } + write_ops++; +#endif } - - out_size += static_cast(out.io_size()); - } catch(const std::exception& ex) { LOG(ERROR, "Failed to get rpc output for path \"{}\" [peer: {}]", path, targets[idx]); err = EIO; } - idx++; } + // As servers can fail (and we cannot know if the total data is written), we + // send the updated size but check that at least one copy of all chunks are + // processed. + if(num_copies) { + // A bit-wise or should show that all the chunks are written (255) + out_size = write_size; +#ifdef REPLICA_CHECK + for(size_t i = 0; i < fill.size() - 1; i++) { + if(fill[i] != 255) { + err = EIO; + break; + } + } + // Process the leftover bytes + for(uint64_t chnk_id = (chnk_start + (fill.size() - 1) * 8); + chnk_id <= chnk_end; chnk_id++) { + if(!(fill[(chnk_id - chnk_start) / 8] & + (1 << ((chnk_id - chnk_start) % 8)))) { + err = EIO; + break; + } + } +#endif + } /* * Typically file systems return the size even if only a part of it was * written. In our case, we do not keep track which daemon fully wrote its @@ -232,11 +288,14 @@ forward_write(const string& path, const void* buf, const off64_t offset, * @param buf * @param offset * @param read_size + * @param num_copies number of copies available (0 is no replication) + * @param failed nodes failed that should not be used * @return pair */ pair forward_read(const string& path, void* buf, const off64_t offset, - const size_t read_size) { + const size_t read_size, const int8_t num_copies, + std::set& failed) { // import pow2-optimized arithmetic functions using namespace gkfs::utils::arithmetic; @@ -246,19 +305,35 @@ forward_read(const string& path, void* buf, const off64_t offset, auto chnk_start = block_index(offset, gkfs::config::rpc::chunksize); auto chnk_end = block_index((offset + read_size - 1), gkfs::config::rpc::chunksize); - + auto chnk_total = (chnk_end - chnk_start) + 1; // Collect all chunk ids within count that have the same destination so // that those are send in one rpc bulk transfer std::map> target_chnks{}; + // contains the recipient ids, used to access the target_chnks map. // First idx is chunk with potential offset std::vector targets{}; // targets for the first and last chunk as they need special treatment uint64_t chnk_start_target = 0; uint64_t chnk_end_target = 0; + std::unordered_map> read_bitset_vect; for(uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { - auto target = CTX->distributor()->locate_data(path, chnk_id); + auto target = CTX->distributor()->locate_data(path, chnk_id, 0); + if(num_copies > 0) { + // If we have some failures we select another copy (randomly). + while(failed.find(target) != failed.end()) { + LOG(DEBUG, "Selecting another node, target: {} down", target); + target = CTX->distributor()->locate_data(path, chnk_id, + rand() % num_copies); + } + } + + if(read_bitset_vect.find(target) == read_bitset_vect.end()) + read_bitset_vect[target] = + std::vector(((chnk_total + 7) / 8)); + read_bitset_vect[target][(chnk_id - chnk_start) / 8] |= + 1 << ((chnk_id - chnk_start) % 8); // set if(target_chnks.count(target) == 0) { target_chnks.insert( @@ -303,6 +378,7 @@ forward_read(const string& path, void* buf, const off64_t offset, // TODO(amiranda): This could be simplified by adding a vector of inputs // to async_engine::broadcast(). This would allow us to avoid manually // looping over handles as we do below + for(const auto& target : targets) { // total chunk_size for target @@ -334,6 +410,7 @@ forward_read(const string& path, void* buf, const off64_t offset, // a potential offset block_overrun(offset, gkfs::config::rpc::chunksize), target, CTX->hosts().size(), + gkfs::rpc::compressBitset(read_bitset_vect[target]), // number of chunks handled by that destination target_chnks[target].size(), // chunk start id of this write @@ -343,11 +420,12 @@ forward_read(const string& path, void* buf, const off64_t offset, // total size to write total_chunk_size, local_buffers); - // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that - // we can retry for RPC_TRIES (see old commits with margo) - // TODO(amiranda): hermes will eventually provide a post(endpoint) - // returning one result and a broadcast(endpoint_set) returning a - // result_set. When that happens we can remove the .at(0) :/ + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so + // that we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a + // post(endpoint) returning one result and a + // broadcast(endpoint_set) returning a result_set. When that + // happens we can remove the .at(0) :/ handles.emplace_back( ld_network_service->post(endp, in)); @@ -394,9 +472,15 @@ forward_read(const string& path, void* buf, const off64_t offset, LOG(ERROR, "Failed to get rpc output for path \"{}\" [peer: {}]", path, targets[idx]); err = EIO; + // We should get targets[idx] and remove from the list of peers + failed.insert(targets[idx]); + // Then repeat the read with another peer (We repear the full + // read, this can be optimised but it is a cornercase) } idx++; } + + /* * Typically file systems return the size even if only a part of it was * read. In our case, we do not keep track which daemon fully read its @@ -413,11 +497,12 @@ forward_read(const string& path, void* buf, const off64_t offset, * @param path * @param current_size * @param new_size + * @param num_copies Number of replicas * @return error code */ int -forward_truncate(const std::string& path, size_t current_size, - size_t new_size) { +forward_truncate(const std::string& path, size_t current_size, size_t new_size, + const int8_t num_copies) { // import pow2-optimized arithmetic functions using namespace gkfs::utils::arithmetic; @@ -434,7 +519,9 @@ forward_truncate(const std::string& path, size_t current_size, std::unordered_set hosts; for(unsigned int chunk_id = chunk_start; chunk_id <= chunk_end; ++chunk_id) { - hosts.insert(CTX->distributor()->locate_data(path, chunk_id)); + for(auto copy = 0; copy < (num_copies + 1); ++copy) { + hosts.insert(CTX->distributor()->locate_data(path, chunk_id, copy)); + } } std::vector> handles; @@ -450,20 +537,23 @@ forward_truncate(const std::string& path, size_t current_size, gkfs::rpc::trunc_data::input in(path, new_size); - // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that - // we can retry for RPC_TRIES (see old commits with margo) - // TODO(amiranda): hermes will eventually provide a post(endpoint) - // returning one result and a broadcast(endpoint_set) returning a - // result_set. When that happens we can remove the .at(0) :/ + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so + // that we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a + // post(endpoint) returning one result and a + // broadcast(endpoint_set) returning a result_set. When that + // happens we can remove the .at(0) :/ handles.emplace_back( ld_network_service->post(endp, in)); } catch(const std::exception& ex) { - // TODO(amiranda): we should cancel all previously posted requests - // here, unfortunately, Hermes does not support it yet :/ + // TODO(amiranda): we should cancel all previously posted + // requests here, unfortunately, Hermes does not support it yet + // :/ LOG(ERROR, "Failed to send request to host: {}", host); err = EIO; - break; // We need to gather all responses so we can't return here + break; // We need to gather all responses so we can't return + // here } } @@ -503,20 +593,23 @@ forward_get_chunk_stat() { gkfs::rpc::chunk_stat::input in(0); - // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that - // we can retry for RPC_TRIES (see old commits with margo) - // TODO(amiranda): hermes will eventually provide a post(endpoint) - // returning one result and a broadcast(endpoint_set) returning a - // result_set. When that happens we can remove the .at(0) :/ + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so + // that we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a + // post(endpoint) returning one result and a + // broadcast(endpoint_set) returning a result_set. When that + // happens we can remove the .at(0) :/ handles.emplace_back( ld_network_service->post(endp, in)); } catch(const std::exception& ex) { - // TODO(amiranda): we should cancel all previously posted requests - // here, unfortunately, Hermes does not support it yet :/ + // TODO(amiranda): we should cancel all previously posted + // requests here, unfortunately, Hermes does not support it yet + // :/ LOG(ERROR, "Failed to send request to host: {}", endp.to_string()); err = EBUSY; - break; // We need to gather all responses so we can't return here + break; // We need to gather all responses so we can't return + // here } } @@ -547,9 +640,11 @@ forward_get_chunk_stat() { chunk_free += out.chunk_free(); } catch(const std::exception& ex) { LOG(ERROR, "Failed to get RPC output from host: {}", i); - err = EBUSY; + // Avoid setting err if a server fails. + // err = EBUSY; } } + if(err) return make_pair(err, ChunkStat{}); else diff --git a/src/client/rpc/forward_management.cpp b/src/client/rpc/forward_management.cpp index 4ebbef9b3..a44b37a1e 100644 --- a/src/client/rpc/forward_management.cpp +++ b/src/client/rpc/forward_management.cpp @@ -45,19 +45,30 @@ forward_get_fs_config() { auto endp = CTX->hosts().at(CTX->local_host_id()); gkfs::rpc::fs_config::output out; - try { - LOG(DEBUG, "Retrieving file system configurations from daemon"); - // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we - // can retry for RPC_TRIES (see old commits with margo) - // TODO(amiranda): hermes will eventually provide a post(endpoint) - // returning one result and a broadcast(endpoint_set) returning a - // result_set. When that happens we can remove the .at(0) :/ - out = ld_network_service->post(endp).get().at(0); - } catch(const std::exception& ex) { - LOG(ERROR, "Retrieving fs configurations from daemon"); - return false; + bool found = false; + size_t idx = 0; + while(!found && idx <= CTX->hosts().size()) { + try { + LOG(DEBUG, "Retrieving file system configurations from daemon"); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + out = ld_network_service->post(endp).get().at( + 0); + found = true; + } catch(const std::exception& ex) { + LOG(ERROR, + "Retrieving fs configurations from daemon, possible reattempt at peer: {}", + idx); + endp = CTX->hosts().at(idx++); + } } + if(!found) + return false; + CTX->mountdir(out.mountdir()); LOG(INFO, "Mountdir: '{}'", CTX->mountdir()); diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index a087590c8..407bc484f 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -51,12 +51,14 @@ namespace gkfs::rpc { * Send an RPC for a create request * @param path * @param mode + * @param copy Number of replica to create * @return error code */ int -forward_create(const std::string& path, const mode_t mode) { +forward_create(const std::string& path, const mode_t mode, const int copy) { - auto endp = CTX->hosts().at(CTX->distributor()->locate_file_metadata(path)); + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path, copy)); try { LOG(DEBUG, "Sending RPC ..."); @@ -81,12 +83,14 @@ forward_create(const std::string& path, const mode_t mode) { * Send an RPC for a stat request * @param path * @param attr + * @param copy metadata replica to read from * @return error code */ int -forward_stat(const std::string& path, string& attr) { +forward_stat(const std::string& path, string& attr, const int copy) { - auto endp = CTX->hosts().at(CTX->distributor()->locate_file_metadata(path)); + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path, copy)); try { LOG(DEBUG, "Sending RPC ..."); @@ -121,40 +125,44 @@ forward_stat(const std::string& path, string& attr) { * This function only attempts data removal if data exists (determined when * metadata is removed) * @param path + * @param num_copies Replication scenarios with many replicas * @return error code */ int -forward_remove(const std::string& path) { - - auto endp = CTX->hosts().at(CTX->distributor()->locate_file_metadata(path)); +forward_remove(const std::string& path, const int8_t num_copies) { int64_t size = 0; uint32_t mode = 0; - /* - * Send one RPC to metadata destination and remove metadata while retrieving - * size and mode to determine if data needs to removed too - */ - try { - LOG(DEBUG, "Sending RPC ..."); - // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we - // can retry for RPC_TRIES (see old commits with margo) - // TODO(amiranda): hermes will eventually provide a post(endpoint) - // returning one result and a broadcast(endpoint_set) returning a - // result_set. When that happens we can remove the .at(0) :/ - auto out = - ld_network_service->post(endp, path) - .get() - .at(0); - - LOG(DEBUG, "Got response success: {}", out.err()); + for(auto copy = 0; copy < (num_copies + 1); copy++) { + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path, copy)); - if(out.err()) - return out.err(); - size = out.size(); - mode = out.mode(); - } catch(const std::exception& ex) { - LOG(ERROR, "while getting rpc output"); - return EBUSY; + /* + * Send one RPC to metadata destination and remove metadata while + * retrieving size and mode to determine if data needs to removed too + */ + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = ld_network_service + ->post(endp, path) + .get() + .at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err()) + return out.err(); + size = out.size(); + mode = out.mode(); + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return EBUSY; + } } // if file is not a regular file and it's size is 0, data does not need to // be removed, thus, we exit @@ -167,44 +175,54 @@ forward_remove(const std::string& path) { // Small files if(static_cast(size / gkfs::config::rpc::chunksize) < CTX->hosts().size()) { - const auto metadata_host_id = - CTX->distributor()->locate_file_metadata(path); - const auto endp_metadata = CTX->hosts().at(metadata_host_id); - - try { - LOG(DEBUG, "Sending RPC to host: {}", endp_metadata.to_string()); - gkfs::rpc::remove_data::input in(path); - handles.emplace_back( - ld_network_service->post( - endp_metadata, in)); - - uint64_t chnk_start = 0; - uint64_t chnk_end = size / gkfs::config::rpc::chunksize; - - for(uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { - const auto chnk_host_id = - CTX->distributor()->locate_data(path, chnk_id); - if constexpr(gkfs::config::metadata::implicit_data_removal) { - /* - * If the chnk host matches the metadata host the remove - * request as already been sent as part of the metadata - * remove request. - */ - if(chnk_host_id == metadata_host_id) - continue; - } - const auto endp_chnk = CTX->hosts().at(chnk_host_id); - - LOG(DEBUG, "Sending RPC to host: {}", endp_chnk.to_string()); + for(auto copymd = 0; copymd < (num_copies + 1); copymd++) { + const auto metadata_host_id = + CTX->distributor()->locate_file_metadata(path, copymd); + const auto endp_metadata = CTX->hosts().at(metadata_host_id); + try { + LOG(DEBUG, "Sending RPC to host: {}", + endp_metadata.to_string()); + gkfs::rpc::remove_data::input in(path); handles.emplace_back( ld_network_service->post( - endp_chnk, in)); + endp_metadata, in)); + + uint64_t chnk_start = 0; + uint64_t chnk_end = size / gkfs::config::rpc::chunksize; + + for(uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; + chnk_id++) { + for(auto copy = 0; copy < (num_copies + 1); copy++) { + const auto chnk_host_id = + CTX->distributor()->locate_data(path, chnk_id, + copy); + if constexpr(gkfs::config::metadata:: + implicit_data_removal) { + /* + * If the chnk host matches the metadata host the + * remove request as already been sent as part of + * the metadata remove request. + */ + if(chnk_host_id == metadata_host_id) + continue; + } + const auto endp_chnk = CTX->hosts().at(chnk_host_id); + + LOG(DEBUG, "Sending RPC to host: {}", + endp_chnk.to_string()); + + handles.emplace_back( + ld_network_service + ->post( + endp_chnk, in)); + } + } + } catch(const std::exception& ex) { + LOG(ERROR, + "Failed to forward non-blocking rpc request reduced remove requests"); + return EBUSY; } - } catch(const std::exception& ex) { - LOG(ERROR, - "Failed to forward non-blocking rpc request reduced remove requests"); - return EBUSY; } } else { // "Big" files for(const auto& endp : CTX->hosts()) { @@ -260,12 +278,14 @@ forward_remove(const std::string& path) { * during a truncate() call. * @param path * @param length + * @param copy Target replica (0 original) * @return error code */ int -forward_decr_size(const std::string& path, size_t length) { +forward_decr_size(const std::string& path, size_t length, const int copy) { - auto endp = CTX->hosts().at(CTX->distributor()->locate_file_metadata(path)); + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path, copy)); try { LOG(DEBUG, "Sending RPC ..."); @@ -295,14 +315,17 @@ forward_decr_size(const std::string& path, size_t length) { * @param path * @param md * @param md_flags + * @param copy Target replica (0 original) * @return error code */ int -forward_update_metadentry( - const string& path, const gkfs::metadata::Metadata& md, - const gkfs::metadata::MetadentryUpdateFlags& md_flags) { +forward_update_metadentry(const string& path, + const gkfs::metadata::Metadata& md, + const gkfs::metadata::MetadentryUpdateFlags& md_flags, + const int copy) { - auto endp = CTX->hosts().at(CTX->distributor()->locate_file_metadata(path)); + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path, copy)); try { LOG(DEBUG, "Sending RPC ..."); @@ -348,6 +371,7 @@ forward_update_metadentry( * This marks that this file doesn't have to be accessed directly * Create a new md with the new name, which should have as value the old name * All operations should check blockcnt and extract a NOTEXISTS + * The operations does not support replication * @param oldpath * @param newpath * @param md @@ -358,8 +382,8 @@ int forward_rename(const string& oldpath, const string& newpath, const gkfs::metadata::Metadata& md) { - auto endp = - CTX->hosts().at(CTX->distributor()->locate_file_metadata(oldpath)); + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(oldpath, 0)); try { LOG(DEBUG, "Sending RPC ..."); @@ -405,8 +429,8 @@ forward_rename(const string& oldpath, const string& newpath, // TODO(amiranda): hermes will eventually provide a post(endpoint) // returning one result and a broadcast(endpoint_set) returning a // result_set. When that happens we can remove the .at(0) :/ - auto endp2 = - CTX->hosts().at(CTX->distributor()->locate_file_metadata(newpath)); + auto endp2 = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(newpath, 0)); try { LOG(DEBUG, "Sending RPC ..."); @@ -479,53 +503,85 @@ forward_rename(const string& oldpath, const string& newpath, /** * Send an RPC request for an update to the file size. * This is called during a write() call or similar + * A single correct call is needed only to progress. * @param path * @param size * @param offset * @param append_flag + * @param num_copies number of replicas * @return pair */ pair forward_update_metadentry_size(const string& path, const size_t size, - const off64_t offset, const bool append_flag) { + const off64_t offset, const bool append_flag, + const int num_copies) { - auto endp = CTX->hosts().at(CTX->distributor()->locate_file_metadata(path)); - try { - LOG(DEBUG, "Sending RPC ..."); - // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we - // can retry for RPC_TRIES (see old commits with margo) - // TODO(amiranda): hermes will eventually provide a post(endpoint) - // returning one result and a broadcast(endpoint_set) returning a - // result_set. When that happens we can remove the .at(0) :/ - auto out = ld_network_service - ->post( - endp, path, size, offset, - bool_to_merc_bool(append_flag)) - .get() - .at(0); + std::vector> handles; - LOG(DEBUG, "Got response success: {}", out.err()); + for(auto copy = 0; copy < num_copies + 1; copy++) { + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path, copy)); + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post( + endp, path, size, offset, + bool_to_merc_bool(append_flag))); + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return make_pair(EBUSY, 0); + } + } + auto err = 0; + ssize_t out_size = 0; + auto idx = 0; + bool valid = false; + for(const auto& h : handles) { + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); - if(out.err()) - return make_pair(out.err(), 0); - else - return make_pair(0, out.ret_size()); - } catch(const std::exception& ex) { - LOG(ERROR, "while getting rpc output"); - return make_pair(EBUSY, 0); + if(out.err() != 0) { + LOG(ERROR, "Daemon {} reported error: {}", idx, out.err()); + } else { + valid = true; + out_size = out.ret_size(); + } + + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to get rpc output"); + if(!valid) { + err = EIO; + } + } + idx++; } + + if(!valid) + return make_pair(err, 0); + else + return make_pair(0, out_size); } + /** * Send an RPC request to get the current file size. * This is called during a lseek() call * @param path + * @param copy Target replica (0 original) * @return pair */ pair -forward_get_metadentry_size(const std::string& path) { +forward_get_metadentry_size(const std::string& path, const int copy) { - auto endp = CTX->hosts().at(CTX->distributor()->locate_file_metadata(path)); + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path, copy)); try { LOG(DEBUG, "Sending RPC ..."); @@ -831,7 +887,8 @@ forward_get_dirents_single(const string& path, int server) { int forward_mk_symlink(const std::string& path, const std::string& target_path) { - auto endp = CTX->hosts().at(CTX->distributor()->locate_file_metadata(path)); + auto endp = + CTX->hosts().at(CTX->distributor()->locate_file_metadata(path, 0)); try { LOG(DEBUG, "Sending RPC ..."); diff --git a/src/common/rpc/distributor.cpp b/src/common/rpc/distributor.cpp index e222a4aa3..5a7621e61 100644 --- a/src/common/rpc/distributor.cpp +++ b/src/common/rpc/distributor.cpp @@ -47,27 +47,34 @@ SimpleHashDistributor::localhost() const { return localhost_; } +unsigned int +SimpleHashDistributor::hosts_size() const { + return hosts_size_; +} + host_t -SimpleHashDistributor::locate_data(const string& path, - const chunkid_t& chnk_id) const { - return str_hash(path + ::to_string(chnk_id)) % hosts_size_; +SimpleHashDistributor::locate_data(const string& path, const chunkid_t& chnk_id, + const int num_copy) const { + return (str_hash(path + ::to_string(chnk_id)) + num_copy) % hosts_size_; } host_t SimpleHashDistributor::locate_data(const string& path, const chunkid_t& chnk_id, - unsigned int hosts_size) { + unsigned int hosts_size, + const int num_copy) { if(hosts_size_ != hosts_size) { hosts_size_ = hosts_size; all_hosts_ = std::vector(hosts_size); ::iota(all_hosts_.begin(), all_hosts_.end(), 0); } - return str_hash(path + ::to_string(chnk_id)) % hosts_size_; + return (str_hash(path + ::to_string(chnk_id)) + num_copy) % hosts_size_; } host_t -SimpleHashDistributor::locate_file_metadata(const string& path) const { - return str_hash(path) % hosts_size_; +SimpleHashDistributor::locate_file_metadata(const string& path, + const int num_copy) const { + return (str_hash(path) + num_copy) % hosts_size_; } ::vector @@ -83,14 +90,20 @@ LocalOnlyDistributor::localhost() const { return localhost_; } +unsigned int +LocalOnlyDistributor::hosts_size() const { + return hosts_size_; +} + host_t -LocalOnlyDistributor::locate_data(const string& path, - const chunkid_t& chnk_id) const { +LocalOnlyDistributor::locate_data(const string& path, const chunkid_t& chnk_id, + const int num_copy) const { return localhost_; } host_t -LocalOnlyDistributor::locate_file_metadata(const string& path) const { +LocalOnlyDistributor::locate_file_metadata(const string& path, + const int num_copy) const { return localhost_; } @@ -110,24 +123,32 @@ ForwarderDistributor::localhost() const { return fwd_host_; } +unsigned int +ForwarderDistributor::hosts_size() const { + return hosts_size_; +} + host_t ForwarderDistributor::locate_data(const std::string& path, - const chunkid_t& chnk_id) const { + const chunkid_t& chnk_id, + const int num_copy) const { return fwd_host_; } host_t ForwarderDistributor::locate_data(const std::string& path, const chunkid_t& chnk_id, - unsigned int host_size) { + unsigned int host_size, const int num_copy) { return fwd_host_; } host_t -ForwarderDistributor::locate_file_metadata(const std::string& path) const { - return str_hash(path) % hosts_size_; +ForwarderDistributor::locate_file_metadata(const std::string& path, + const int num_copy) const { + return (str_hash(path) + num_copy) % hosts_size_; } + std::vector ForwarderDistributor::locate_directory_metadata(const std::string& path) const { return all_hosts_; @@ -213,21 +234,26 @@ GuidedDistributor::localhost() const { return localhost_; } +unsigned int +GuidedDistributor::hosts_size() const { + return hosts_size_; +} + host_t GuidedDistributor::locate_data(const string& path, const chunkid_t& chnk_id, - unsigned int hosts_size) { + unsigned int hosts_size, const int num_copy) { if(hosts_size_ != hosts_size) { hosts_size_ = hosts_size; all_hosts_ = std::vector(hosts_size); ::iota(all_hosts_.begin(), all_hosts_.end(), 0); } - return (locate_data(path, chnk_id)); + return (locate_data(path, chnk_id, num_copy)); } host_t -GuidedDistributor::locate_data(const string& path, - const chunkid_t& chnk_id) const { +GuidedDistributor::locate_data(const string& path, const chunkid_t& chnk_id, + const int num_copy) const { auto it = map_interval.find(path); if(it != map_interval.end()) { auto it_f = it->second.first.IsInsideInterval(chnk_id); @@ -245,14 +271,16 @@ GuidedDistributor::locate_data(const string& path, } auto locate = path + ::to_string(chnk_id); - return str_hash(locate) % hosts_size_; + return (str_hash(locate) + num_copy) % hosts_size_; } host_t -GuidedDistributor::locate_file_metadata(const string& path) const { - return str_hash(path) % hosts_size_; +GuidedDistributor::locate_file_metadata(const string& path, + const int num_copy) const { + return (str_hash(path) + num_copy) % hosts_size_; } + ::vector GuidedDistributor::locate_directory_metadata(const string& path) const { return all_hosts_; diff --git a/src/common/rpc/rpc_util.cpp b/src/common/rpc/rpc_util.cpp index 46970b82f..1016ea9ff 100644 --- a/src/common/rpc/rpc_util.cpp +++ b/src/common/rpc/rpc_util.cpp @@ -36,6 +36,7 @@ extern "C" { #include + using namespace std; namespace gkfs::rpc { @@ -104,4 +105,111 @@ get_host_by_name(const string& hostname) { } #endif +/** + * @brief Get the bit from a bit vector + * + * @param data + * @param position + * @return the bit + */ +bool +get_bitset(const std::vector& data, const uint16_t position) { + return (data[(position) / 8] & 1 << ((position) % 8)); +} + +/** + * @brief Get the bit from a bit vector + * + * @param data + * @param position + */ +void +set_bitset(std::vector& data, const uint16_t position) { + data[(position) / 8] |= 1 << ((position) % 8); // set +} + +std::string +base64_encode(const std::vector& data) { + static const std::string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + std::ostringstream encoded; + uint32_t buffer = 0; + int padding = 0; + + for(uint8_t byte : data) { + buffer = (buffer << 8) | byte; + padding += 8; + while(padding >= 6) { + padding -= 6; + encoded << base64_chars[(buffer >> padding) & 0x3F]; + } + } + + if(padding > 0) { + buffer <<= 6 - padding; + encoded << base64_chars[buffer & 0x3F]; + } + + while(encoded.str().length() % 4 != 0) { + encoded << '='; + } + + return encoded.str(); +} + +std::vector +base64_decode(const std::string& encoded) { + static const std::string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + std::vector data; + uint32_t buffer = 0; + int padding = 0; + size_t count = 0; + + for(char c : encoded) { + if(c == '=') + break; + + std::size_t value = base64_chars.find(c); + if(value == std::string::npos) + continue; + + buffer = (buffer << 6) | value; + padding += 6; + + if(padding >= 8) { + padding -= 8; + data.push_back(static_cast((buffer >> padding) & 0xFF)); + count++; + } + } + + // Handle padding characters + if(padding > 0 && padding < 6 && (buffer & ((1 << padding) - 1)) == 0) { + // Remove the padding bits + buffer >>= padding; + padding = 0; + data.push_back(static_cast((buffer >> 8) & 0xFF)); + count++; + } + + if(count == 0 || padding % 8 != 0) + return {}; + + return data; +} + +std::string +compressBitset(const std::vector& bytes) { + return base64_encode(bytes); +} + +std::vector +decompressBitset(const std::string& compressedString) { + return base64_decode(compressedString); +} + + } // namespace gkfs::rpc \ No newline at end of file diff --git a/src/daemon/handler/srv_data.cpp b/src/daemon/handler/srv_data.cpp index 35f636a53..46e87a200 100644 --- a/src/daemon/handler/srv_data.cpp +++ b/src/daemon/handler/srv_data.cpp @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -51,9 +52,9 @@ #define AGIOS_WRITE 1 #define AGIOS_SERVER_ID_IGNORE 0 #endif - using namespace std; + namespace { /** @@ -115,6 +116,8 @@ rpc_srv_write(hg_handle_t handle) { __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n, in.total_chunk_size, bulk_size, in.offset); + std::vector write_ops_vect = + gkfs::rpc::decompressBitset(in.wbitset); #ifdef GKFS_ENABLE_AGIOS int* data; @@ -229,8 +232,9 @@ rpc_srv_write(hg_handle_t handle) { chnk_id_file++) { // Continue if chunk does not hash to this host #ifndef GKFS_ENABLE_FORWARDING - if(RPC_DATA->distributor()->locate_data(in.path, chnk_id_file, - host_size) != host_id) { + + if(!(gkfs::rpc::get_bitset(write_ops_vect, + chnk_id_file - in.chunk_start))) { GKFS_DATA->spdlogger()->trace( "{}() chunkid '{}' ignored as it does not match to this host with id '{}'. chnk_id_curr '{}'", __func__, chnk_id_file, host_id, chnk_id_curr); @@ -240,8 +244,10 @@ rpc_srv_write(hg_handle_t handle) { if(GKFS_DATA->enable_chunkstats()) { GKFS_DATA->stats()->add_write(in.path, chnk_id_file); } -#endif +#endif + GKFS_DATA->spdlogger()->error("{}() Processing at host {} -> {}", + __func__, host_id, chnk_id_file); chnk_ids_host[chnk_id_curr] = chnk_id_file; // save this id to host chunk list // offset case. Only relevant in the first iteration of the loop and if @@ -417,7 +423,8 @@ rpc_srv_read(hg_handle_t handle) { "{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'", __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n, in.total_chunk_size, bulk_size, in.offset); - + std::vector read_bitset_vect = + gkfs::rpc::decompressBitset(in.wbitset); #ifdef GKFS_ENABLE_AGIOS int* data; ABT_eventual eventual = ABT_EVENTUAL_NULL; @@ -520,8 +527,10 @@ rpc_srv_read(hg_handle_t handle) { chnk_id_file++) { // Continue if chunk does not hash to this host #ifndef GKFS_ENABLE_FORWARDING - if(RPC_DATA->distributor()->locate_data(in.path, chnk_id_file, - host_size) != host_id) { + // We only check if we are not using replicas + + if(!(gkfs::rpc::get_bitset(read_bitset_vect, + chnk_id_file - in.chunk_start))) { GKFS_DATA->spdlogger()->trace( "{}() chunkid '{}' ignored as it does not match to this host with id '{}'. chnk_id_curr '{}'", __func__, chnk_id_file, host_id, chnk_id_curr); @@ -530,6 +539,7 @@ rpc_srv_read(hg_handle_t handle) { if(GKFS_DATA->enable_chunkstats()) { GKFS_DATA->stats()->add_read(in.path, chnk_id_file); } + #endif chnk_ids_host[chnk_id_curr] = @@ -597,6 +607,10 @@ rpc_srv_read(hg_handle_t handle) { GKFS_DATA->spdlogger()->warn( "{}() Not all chunks were detected!!! Size left {}", __func__, chnk_size_left_host); + + if(chnk_size_left_host == in.total_chunk_size) + return HG_CANCELED; + /* * 4. Read task results and accumulate in out.io_size */ diff --git a/tests/unit/test_guided_distributor.cpp b/tests/unit/test_guided_distributor.cpp index eb36647af..36b6f5910 100644 --- a/tests/unit/test_guided_distributor.cpp +++ b/tests/unit/test_guided_distributor.cpp @@ -39,23 +39,23 @@ TEST_CASE( "Guided distributor Testing", "[Distributor]" ) { // The distributor should return 3 for all the tested files auto d = gkfs::rpc::GuidedDistributor(); - REQUIRE( d.locate_data("/t.c01",1,10) == 3 ); - REQUIRE( d.locate_data("/t.c02",1,10) == 3 ); - REQUIRE( d.locate_data("/t.c03",1,10) == 3 ); - REQUIRE( d.locate_data("/t.c04",1,10) == 3 ); - REQUIRE( d.locate_data("/t.c05",1,10) == 3 ); - REQUIRE( d.locate_data("/t.c06",1,10) == 3 ); - REQUIRE( d.locate_data("/t.c07",1,10) == 3 ); + REQUIRE( d.locate_data("/t.c01",1,10,0) == 3 ); + REQUIRE( d.locate_data("/t.c02",1,10,0) == 3 ); + REQUIRE( d.locate_data("/t.c03",1,10,0) == 3 ); + REQUIRE( d.locate_data("/t.c04",1,10,0) == 3 ); + REQUIRE( d.locate_data("/t.c05",1,10,0) == 3 ); + REQUIRE( d.locate_data("/t.c06",1,10,0) == 3 ); + REQUIRE( d.locate_data("/t.c07",1,10,0) == 3 ); // Next result is random, but with the same seed is consistent // We ask for chunk 5 that is distributed randomly between the // 10 servers. - REQUIRE ( (d.locate_data("/t.c01",5,10) + - d.locate_data("/t.c02",5,10) + - d.locate_data("/t.c03",5,10) + - d.locate_data("/t.c04",5,10) + - d.locate_data("/t.c05",5,10) + - d.locate_data("/t.c06",5,10) + - d.locate_data("/t.c07",5,10) ) == 42); + REQUIRE ( (d.locate_data("/t.c01",5,10,0) + + d.locate_data("/t.c02",5,10,0) + + d.locate_data("/t.c03",5,10,0) + + d.locate_data("/t.c04",5,10,0) + + d.locate_data("/t.c05",5,10,0) + + d.locate_data("/t.c06",5,10,0) + + d.locate_data("/t.c07",5,10,0) ) == 42); } } -- GitLab From efcd0215c1819c09c61800f8769348dbfac83dcb Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Mon, 26 Jun 2023 12:10:35 +0200 Subject: [PATCH 02/17] Write parameters change --- include/common/metadata.hpp | 1 - src/client/gkfs_functions.cpp | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/common/metadata.hpp b/include/common/metadata.hpp index d64f94921..25ed22f71 100644 --- a/include/common/metadata.hpp +++ b/include/common/metadata.hpp @@ -37,7 +37,6 @@ #include #include - namespace gkfs::metadata { constexpr mode_t LINK_MODE = ((S_IRWXU | S_IRWXG | S_IRWXO) | S_IFLNK); diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 8acb587e4..36edd25b1 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -911,9 +911,8 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, write_size = ret_write.second; if(num_replicas > 0) { - auto ret_write_repl = - gkfs::rpc::forward_write(*path, buf, append_flag, offset, count, - updated_size, num_replicas); + auto ret_write_repl = gkfs::rpc::forward_write(*path, buf, offset, + count, num_replicas); if(err and ret_write_repl.first == 0) { // We succesfully write the data to some replica -- GitLab From cb2cebe17e825b943ecd94e42e8be1efdb483a88 Mon Sep 17 00:00:00 2001 From: rnou Date: Thu, 13 Apr 2023 13:01:53 +0200 Subject: [PATCH 03/17] WIP - REPL Added NUM_REPL env variable. (0 no replicas) NUM_REPL num replicas (Replicas < servers) Remove and truncate Metadata replication - WIP Metadata replication - Reattempt on stat minimal compilation issues (c++20), srand for repl Bitset Bit set proposal (WIP) Read - Write with bitset (<1024 chunks) Changed bitset to vector Added get_fs_config reattempt Some more resilience on create Added Replica_Check on write (disabled) Added helper vector-bitset functions --- src/client/gkfs_functions.cpp | 6 +----- src/client/rpc/forward_data.cpp | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 36edd25b1..e96fbe096 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -911,13 +911,9 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, write_size = ret_write.second; if(num_replicas > 0) { + auto ret_write_repl = gkfs::rpc::forward_write(*path, buf, offset, count, num_replicas); - - if(err and ret_write_repl.first == 0) { - // We succesfully write the data to some replica - err = ret_write_repl.first; - // Write size will be wrong write_size = ret_write_repl.second; } } diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index f01e149e7..e59ecbca6 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -57,6 +57,7 @@ namespace gkfs::rpc { * @param buf * @param append_flag * @param write_size + * @param updated_metadentry_size * @param num_copies number of replicas * @return pair */ -- GitLab From 7cd14934b1f957acb3f341f3a19cd4a364146671 Mon Sep 17 00:00:00 2001 From: rnou Date: Tue, 23 May 2023 13:06:58 +0200 Subject: [PATCH 04/17] Jerasure added --- CMake/FindJerasure.cmake | 57 +++++++++++++++++++++++++++++++++++++++ src/client/CMakeLists.txt | 1 + 2 files changed, 58 insertions(+) create mode 100644 CMake/FindJerasure.cmake diff --git a/CMake/FindJerasure.cmake b/CMake/FindJerasure.cmake new file mode 100644 index 000000000..b9c2637e1 --- /dev/null +++ b/CMake/FindJerasure.cmake @@ -0,0 +1,57 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# +# - Try to find Jerasure library +# This will define +# Jerasure_FOUND +# Jerasure_INCLUDE_DIR +# Jerasure_LIBRARIES +# + +find_path(Jerasure_INCLUDE_DIR + NAMES jerasure.h + ) + +find_library(Jerasure_LIBRARY + NAMES Jerasure + ) + +set(Jerasure_LIBRARIES ${Jerasure_LIBRARY}) +set(Jerasure_INCLUDE_DIRS ${Jerasure_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(Jerasure + DEFAULT_MSG Jerasure_LIBRARY Jerasure_INCLUDE_DIR + ) + +mark_as_advanced( + Jerasure_LIBRARY + Jerasure_INCLUDE_DIR +) \ No newline at end of file diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 536d91165..414e54012 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -62,6 +62,7 @@ target_link_libraries( fmt::fmt Threads::Threads Date::TZ + Jerasure ) install( -- GitLab From 38c7bad03290cfbf792c22d398d994c8c540406d Mon Sep 17 00:00:00 2001 From: rnou Date: Tue, 23 May 2023 13:59:57 +0200 Subject: [PATCH 05/17] New ECCDistributor --- include/common/rpc/distributor.hpp | 36 +++++++++++++++++++ src/common/rpc/distributor.cpp | 55 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/include/common/rpc/distributor.hpp b/include/common/rpc/distributor.hpp index b6c09a0e0..a0b227fd1 100644 --- a/include/common/rpc/distributor.hpp +++ b/include/common/rpc/distributor.hpp @@ -102,6 +102,42 @@ public: locate_directory_metadata(const std::string& path) const override; }; +class ECCDistributor : public Distributor { +private: + host_t localhost_; + unsigned int hosts_size_{0}; + unsigned int ecc_size_{0}; + std::vector all_hosts_; + std::hash str_hash; + +public: + ECCDistributor(); + + ECCDistributor(host_t localhost, unsigned int hosts_size, + unsigned int ecc_size); + + unsigned int + hosts_size() const override; + + host_t + localhost() const override; + + host_t + locate_data(const std::string& path, const chunkid_t& chnk_id, + const int num_copy) const override; + + host_t + locate_data(const std::string& path, const chunkid_t& chnk_id, + unsigned int host_size, const int num_copy); + + host_t + locate_file_metadata(const std::string& path, + const int num_copy) const override; + + std::vector + locate_directory_metadata(const std::string& path) const override; +}; + class LocalOnlyDistributor : public Distributor { private: host_t localhost_; diff --git a/src/common/rpc/distributor.cpp b/src/common/rpc/distributor.cpp index 5a7621e61..e0f39a9bc 100644 --- a/src/common/rpc/distributor.cpp +++ b/src/common/rpc/distributor.cpp @@ -82,6 +82,61 @@ SimpleHashDistributor::locate_directory_metadata(const string& path) const { return all_hosts_; } + +ECCDistributor::ECCDistributor(host_t localhost, unsigned int hosts_size, + unsigned int ecc_size) + : localhost_(localhost), hosts_size_(hosts_size), ecc_size_(ecc_size), + all_hosts_(hosts_size) { + ::iota(all_hosts_.begin(), all_hosts_.end(), 0); +} + +ECCDistributor::ECCDistributor() {} + +host_t +ECCDistributor::localhost() const { + return localhost_; +} + +unsigned int +ECCDistributor::hosts_size() const { + return hosts_size_; +} + +host_t +ECCDistributor::locate_data(const string& path, const chunkid_t& chnk_id, + const int num_copy) const { + if(num_copy == 0) { + return (str_hash(path + ::to_string(chnk_id))) % + (hosts_size_ - ecc_size_); + } else { + return (num_copy + (hosts_size_ - ecc_size_)); + } +} + +host_t +ECCDistributor::locate_data(const string& path, const chunkid_t& chnk_id, + unsigned int hosts_size, const int num_copy) { + if(hosts_size_ != hosts_size) { + hosts_size_ = hosts_size; + all_hosts_ = std::vector(hosts_size); + ::iota(all_hosts_.begin(), all_hosts_.end(), 0); + } + + return (locate_data(path, chnk_id, num_copy)); +} + +host_t +ECCDistributor::locate_file_metadata(const string& path, + const int num_copy) const { + return (str_hash(path) + num_copy) % hosts_size_; +} + +::vector +ECCDistributor::locate_directory_metadata(const string& path) const { + return all_hosts_; +} + + LocalOnlyDistributor::LocalOnlyDistributor(host_t localhost) : localhost_(localhost) {} -- GitLab From f6bab8259981407835535cecf17e1db69591f3d8 Mon Sep 17 00:00:00 2001 From: rnou Date: Tue, 23 May 2023 14:09:30 +0200 Subject: [PATCH 06/17] Added ECC Distribution in client Parity calculation (WIP) Reed Solomon coding (WIP) --- include/client/rpc/forward_data.hpp | 6 +++ include/common/rpc/distributor.hpp | 2 +- src/client/gkfs_functions.cpp | 83 +++++++++++++++++++++++++++-- src/client/preload.cpp | 3 ++ src/client/rpc/forward_data.cpp | 30 +++++++++++ 5 files changed, 120 insertions(+), 4 deletions(-) diff --git a/include/client/rpc/forward_data.hpp b/include/client/rpc/forward_data.hpp index 45384a157..072d4bc9f 100644 --- a/include/client/rpc/forward_data.hpp +++ b/include/client/rpc/forward_data.hpp @@ -59,6 +59,12 @@ forward_truncate(const std::string& path, size_t current_size, size_t new_size, std::pair forward_get_chunk_stat(); +#define GKFS_USE_ECC_DISTRIBUTION 1 +#ifdef GKFS_USE_ECC_DISTRIBUTION +std::pair calc_op_chunks(const std::string& path, const bool append_flag, + const off64_t in_offset, const size_t write_size, + const int64_t updated_metadentry_size); +#endif } // namespace gkfs::rpc diff --git a/include/common/rpc/distributor.hpp b/include/common/rpc/distributor.hpp index a0b227fd1..bbec815ce 100644 --- a/include/common/rpc/distributor.hpp +++ b/include/common/rpc/distributor.hpp @@ -36,7 +36,7 @@ #include #include #include - +#define GKFS_USE_ECC_DISTRIBUTION 1 namespace gkfs::rpc { using chunkid_t = unsigned int; diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index e96fbe096..79570ad9e 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -45,6 +45,8 @@ extern "C" { #include } +#include +#include using namespace std; /* @@ -909,7 +911,82 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, auto ret_write = gkfs::rpc::forward_write(*path, buf, offset, count, 0); err = ret_write.first; write_size = ret_write.second; +#define GKFS_USE_ECC_DISTRIBUTION 1 +#ifdef GKFS_USE_ECC_DISTRIBUTION + // Process ECC calculation + + // 0 - Involved chunks: + + auto chunks = gkfs::rpc::calc_op_chunks(*path, append_flag, offset, count, + updated_size); + + std::set chunk_set; + + // For each chunk we will have a set of chunks involved on that calculation + // [0] [1] [2] [3] [4] [n-p] [p1] [p2] + // [n-p+1] .... + // i.e. : [0] -> 1,2,3,4,n-p + // i.e : [4] -> 0,1,2,3,n-p + // i.e : [n-p+1] -> + // 3 data serv + // (chunk / data_servers)*data_servers --> Initial row chunk + // Involved : From initial to ... initial + data_servers + if((uint64_t) updated_size >= + (uint64_t) CTX->hosts().size() * gkfs::config::rpc::chunksize) { + auto data_servers = CTX->hosts().size() - CTX->get_replicas(); + for(auto i = chunks.first; i <= chunks.second; ++i) { + auto initial_row_chunk = (i / data_servers) * data_servers; + chunk_set.insert(initial_row_chunk); + } + // Parity Stored in : parity1 .. parity2, as name = + // [PARITY][Path][Initial row chunk] + + // 1 - Read data from the other chunks + std::vector buffers( + CTX->hosts().size(), + (char*) malloc(gkfs::config::rpc::chunksize)); + std::cout << "OPERATION " + << " --- Size : " << updated_size + << " Chunks Range:" << chunks.first << " -- " << chunks.second + << " Data + Repliscas " << data_servers << " -- " + << CTX->get_replicas() << std::endl; + // TODO : This could be optimised, with a single read loop + for(auto i : chunk_set) { + std::cout << i << " --- Size : " << updated_size << std::endl; + for(auto j = i; j < i + data_servers; ++j) { + std::set failed; + std::cout << " Reading chunk " + << " [" << i << "] --> " << j << std::endl; + auto out = gkfs::rpc::forward_read( + *path, buffers[j - i], j * gkfs::config::rpc::chunksize, + gkfs::config::rpc::chunksize, 0, failed); + std::cout << " Read Success " << out.first << " -- " + << out.second << std::endl; + } + + // We have all the data to process a EC + + std::vector coding( + CTX->get_replicas(), + (char*) malloc(gkfs::config::rpc::chunksize)); + auto matrix = reed_sol_vandermonde_coding_matrix( + data_servers, CTX->get_replicas(), 8); + jerasure_matrix_encode(data_servers, CTX->get_replicas(), 8, matrix, + buffers.data(), coding.data(), + gkfs::config::rpc::chunksize); + + std::cout << " Parity computation done " << std::endl; + } + } else { + std::cout << "No EC in small files" << std::endl; + } + // 2 - Calc Erasure codes + + // 3 - Write erasure codes + + +#else if(num_replicas > 0) { auto ret_write_repl = gkfs::rpc::forward_write(*path, buf, offset, @@ -917,7 +994,7 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, write_size = ret_write_repl.second; } } - +#endif if(err) { LOG(WARNING, "gkfs::rpc::forward_write() failed with err '{}'", err); errno = err; @@ -1056,8 +1133,8 @@ gkfs_pread(std::shared_ptr file, char* buf, std::set failed; // set with failed targets. if(CTX->get_replicas() != 0) { - ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, - CTX->get_replicas(), failed); + ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, 0, + failed); while(ret.first == EIO) { ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, CTX->get_replicas(), failed); diff --git a/src/client/preload.cpp b/src/client/preload.cpp index d9b242636..c4d1a2490 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -223,6 +223,9 @@ init_environment() { #ifdef GKFS_USE_GUIDED_DISTRIBUTION auto distributor = std::make_shared( CTX->local_host_id(), CTX->hosts().size()); +#elif GKFS_USE_ECC_DISTRIBUTION + auto distributor = std::make_shared( + CTX->local_host_id(), CTX->hosts().size(), CTX->get_replicas()); #else auto distributor = std::make_shared( CTX->local_host_id(), CTX->hosts().size()); diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index e59ecbca6..26de8270f 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -47,6 +47,36 @@ namespace gkfs::rpc { * NOTE: No errno is defined here! */ +#ifdef GKFS_USE_ECC_DISTRIBUTION +/** + * @brief Calculate the chunk start and end that will be affected by the operation. + * + * @param path + * @param append_flag + * @param in_offset + * @param write_size + * @param updated_metadentry_size + * @param num_copies + * @return pair + */ +std::pair +calc_op_chunks(const std::string& path, const bool append_flag, + const off64_t in_offset, const size_t write_size, + const int64_t updated_metadentry_size) { + using namespace gkfs::utils::arithmetic; + off64_t offset = + append_flag ? in_offset : (updated_metadentry_size - write_size); + + auto chnk_start = block_index(offset, gkfs::config::rpc::chunksize); + auto chnk_end = block_index((offset + write_size) - 1, + gkfs::config::rpc::chunksize); + + + return make_pair(chnk_start, chnk_end); +} + +#endif + /** * Send an RPC request to write from a buffer. * There is a bitset of 1024 chunks to tell the server -- GitLab From f01c3f8eaf89cf50b87f4d795b038e8eb0c12e6f Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Fri, 26 May 2023 13:43:37 +0200 Subject: [PATCH 07/17] Jerasure and GF-complete deps --- CMake/FindGF_complete.cmake | 68 ++++++++++++++ CMake/FindJerasure.cmake | 30 +++++-- CMakeLists.txt | 2 + scripts/profiles/0.9.3-exp/agios.specs | 81 +++++++++++++++++ scripts/profiles/0.9.3-exp/all.specs | 89 +++++++++++++++++++ scripts/profiles/0.9.3-exp/ci.specs | 81 +++++++++++++++++ scripts/profiles/0.9.3-exp/default.specs | 80 +++++++++++++++++ scripts/profiles/0.9.3-exp/direct.specs | 76 ++++++++++++++++ .../profiles/0.9.3-exp/infiniband_verbs.specs | 82 +++++++++++++++++ .../0.9.3-exp/install/Jerasure.install | 58 ++++++++++++ .../profiles/0.9.3-exp/install/agios.install | 57 ++++++++++++ .../0.9.3-exp/install/argobots.install | 60 +++++++++++++ .../0.9.3-exp/install/capstone.install | 57 ++++++++++++ .../profiles/0.9.3-exp/install/curl.install | 58 ++++++++++++ .../profiles/0.9.3-exp/install/date.install | 61 +++++++++++++ .../0.9.3-exp/install/gf-complete.install | 58 ++++++++++++ .../profiles/0.9.3-exp/install/json-c.install | 60 +++++++++++++ .../0.9.3-exp/install/libfabric%verbs.install | 67 ++++++++++++++ .../0.9.3-exp/install/libfabric.install | 67 ++++++++++++++ .../profiles/0.9.3-exp/install/lz4.install | 61 +++++++++++++ .../profiles/0.9.3-exp/install/margo.install | 59 ++++++++++++ .../0.9.3-exp/install/mercury.install | 82 +++++++++++++++++ .../0.9.3-exp/install/parallax.install | 64 +++++++++++++ .../0.9.3-exp/install/prometheus-cpp.install | 61 +++++++++++++ .../0.9.3-exp/install/rocksdb.install | 79 ++++++++++++++++ .../install/syscall_intercept.install | 61 +++++++++++++ scripts/profiles/0.9.3-exp/marenostrum4.specs | 83 +++++++++++++++++ scripts/profiles/0.9.3-exp/mogon2.specs | 83 +++++++++++++++++ scripts/profiles/0.9.3-exp/ngio.specs | 83 +++++++++++++++++ .../profiles/0.9.3-exp/omnipath_psm2.specs | 82 +++++++++++++++++ scripts/profiles/0.9.3-exp/p9.specs | 82 +++++++++++++++++ scripts/profiles/sources.list | 2 + src/client/CMakeLists.txt | 3 +- src/client/gkfs_functions.cpp | 2 +- 34 files changed, 2070 insertions(+), 9 deletions(-) create mode 100644 CMake/FindGF_complete.cmake create mode 100644 scripts/profiles/0.9.3-exp/agios.specs create mode 100644 scripts/profiles/0.9.3-exp/all.specs create mode 100644 scripts/profiles/0.9.3-exp/ci.specs create mode 100644 scripts/profiles/0.9.3-exp/default.specs create mode 100644 scripts/profiles/0.9.3-exp/direct.specs create mode 100644 scripts/profiles/0.9.3-exp/infiniband_verbs.specs create mode 100644 scripts/profiles/0.9.3-exp/install/Jerasure.install create mode 100644 scripts/profiles/0.9.3-exp/install/agios.install create mode 100644 scripts/profiles/0.9.3-exp/install/argobots.install create mode 100644 scripts/profiles/0.9.3-exp/install/capstone.install create mode 100644 scripts/profiles/0.9.3-exp/install/curl.install create mode 100644 scripts/profiles/0.9.3-exp/install/date.install create mode 100644 scripts/profiles/0.9.3-exp/install/gf-complete.install create mode 100644 scripts/profiles/0.9.3-exp/install/json-c.install create mode 100644 scripts/profiles/0.9.3-exp/install/libfabric%verbs.install create mode 100644 scripts/profiles/0.9.3-exp/install/libfabric.install create mode 100644 scripts/profiles/0.9.3-exp/install/lz4.install create mode 100644 scripts/profiles/0.9.3-exp/install/margo.install create mode 100644 scripts/profiles/0.9.3-exp/install/mercury.install create mode 100644 scripts/profiles/0.9.3-exp/install/parallax.install create mode 100644 scripts/profiles/0.9.3-exp/install/prometheus-cpp.install create mode 100644 scripts/profiles/0.9.3-exp/install/rocksdb.install create mode 100644 scripts/profiles/0.9.3-exp/install/syscall_intercept.install create mode 100644 scripts/profiles/0.9.3-exp/marenostrum4.specs create mode 100644 scripts/profiles/0.9.3-exp/mogon2.specs create mode 100644 scripts/profiles/0.9.3-exp/ngio.specs create mode 100644 scripts/profiles/0.9.3-exp/omnipath_psm2.specs create mode 100644 scripts/profiles/0.9.3-exp/p9.specs diff --git a/CMake/FindGF_complete.cmake b/CMake/FindGF_complete.cmake new file mode 100644 index 000000000..4af6878ea --- /dev/null +++ b/CMake/FindGF_complete.cmake @@ -0,0 +1,68 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# +# - Try to find GF_complete library +# This will define +# GF_complete_FOUND +# GF_complete_INCLUDE_DIR +# GF_complete_LIBRARIES +# + +find_path(GF_complete_INCLUDE_DIR + NAMES gf_complete.h + ) + +find_library(GF_complete_LIBRARY + NAMES gf_complete + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( GF_complete + DEFAULT_MSG + GF_complete_INCLUDE_DIR + GF_complete_LIBRARY +) + +if(GF_complete_FOUND) + set(GF_complete_INCLUDE_DIRS ${GF_complete_INCLUDE_DIR}) + set(GF_complete_LIBRARIES ${GF_complete_LIBRARY}) + + if(NOT TARGET GF_complete::GF_complete) + add_library(GF_complete::GF_complete UNKNOWN IMPORTED) + set_target_properties(GF_complete::GF_complete PROPERTIES + IMPORTED_LOCATION "${GF_complete_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${GF_complete_INCLUDE_DIR}" + ) + endif() +endif() + +mark_as_advanced( + GF_complete_INCLUDE_DIR + GF_complete_LIBRARY +) diff --git a/CMake/FindJerasure.cmake b/CMake/FindJerasure.cmake index b9c2637e1..ffd0ece1e 100644 --- a/CMake/FindJerasure.cmake +++ b/CMake/FindJerasure.cmake @@ -34,24 +34,40 @@ # Jerasure_LIBRARIES # +# - Try to find galois as Jerasure.h is installed in the root include find_path(Jerasure_INCLUDE_DIR NAMES jerasure.h ) +find_path(Jerasure2_INCLUDE_DIR + NAMES galois.h + ) + find_library(Jerasure_LIBRARY NAMES Jerasure ) -set(Jerasure_LIBRARIES ${Jerasure_LIBRARY}) -set(Jerasure_INCLUDE_DIRS ${Jerasure_INCLUDE_DIR}) - include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( Jerasure + DEFAULT_MSG + Jerasure_INCLUDE_DIR + Jerasure_LIBRARY +) -find_package_handle_standard_args(Jerasure - DEFAULT_MSG Jerasure_LIBRARY Jerasure_INCLUDE_DIR - ) +if(Jerasure_FOUND) + set(Jerasure_INCLUDE_DIRS ${Jerasure_INCLUDE_DIR}) + set(Jerasure_LIBRARIES ${Jerasure_LIBRARY}) + + if(NOT TARGET Jerasure::Jerasure) + add_library(Jerasure::Jerasure UNKNOWN IMPORTED) + set_target_properties(Jerasure::Jerasure PROPERTIES + IMPORTED_LOCATION "${Jerasure_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${Jerasure_INCLUDE_DIR}" + ) + endif() +endif() mark_as_advanced( - Jerasure_LIBRARY Jerasure_INCLUDE_DIR + Jerasure_LIBRARY ) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 71e7c842f..1aedca403 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,6 +203,8 @@ if(GKFS_ENABLE_PROMETHEUS) find_package(prometheus-cpp REQUIRED) # >= 1.0.0 endif() +find_package(GF_complete) +find_package(Jerasure) ### Other stuff that can be found out using find_package: # determine the thread library of the system diff --git a/scripts/profiles/0.9.3-exp/agios.specs b/scripts/profiles/0.9.3-exp/agios.specs new file mode 100644 index 000000000..e967029cd --- /dev/null +++ b/scripts/profiles/0.9.3-exp/agios.specs @@ -0,0 +1,81 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="All dependencies (except transport-specific and experimental)" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["json-c"]="0.15-20200726" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" + ["agios"]="c26a6544200f823ebb8f890dd94e653d148bf226@development" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "lz4" "capstone" "json-c" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date" "agios" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( +) diff --git a/scripts/profiles/0.9.3-exp/all.specs b/scripts/profiles/0.9.3-exp/all.specs new file mode 100644 index 000000000..e6ad862db --- /dev/null +++ b/scripts/profiles/0.9.3-exp/all.specs @@ -0,0 +1,89 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Dependencies for Mogon 2 supercomputer" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["psm2"]="11.2.185" + ["json-c"]="0.15-20200726" + ["curl"]="7.82.0" + ["prometheus-cpp"]="v1.0.0" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" + ["agios"]="c26a6544200f823ebb8f890dd94e653d148bf226@development" + ["parallax"]="ffdea6e820f5c4c2d33e60d9a4b15ef9e6bbcfdd" + ["gf-complete"]="a6862d10c9db467148f20eef2c6445ac9afd94d8" + ["Jerasure"]="414c96ef2b9934953b6facb31d803d79b1dd1405" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "lz4" "capstone" "json-c" "psm2" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date" + "agios" "curl" "prometheus-cpp" "parallax" "gf-complete" "Jerasure" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( + ["libfabric"]="--enable-psm2=yes --with-psm2-src=${SOURCE_DIR}/psm2" +) diff --git a/scripts/profiles/0.9.3-exp/ci.specs b/scripts/profiles/0.9.3-exp/ci.specs new file mode 100644 index 000000000..aec646112 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/ci.specs @@ -0,0 +1,81 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Dependencies required by the CI" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["prometheus-cpp"]="v1.0.0" + ) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" + ["agios"]="c26a6544200f823ebb8f890dd94e653d148bf226@development" + ["parallax"]="ffdea6e820f5c4c2d33e60d9a4b15ef9e6bbcfdd" + ) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" + "date" "agios" "parallax" "prometheus-cpp" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( +) diff --git a/scripts/profiles/0.9.3-exp/default.specs b/scripts/profiles/0.9.3-exp/default.specs new file mode 100644 index 000000000..afc567273 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/default.specs @@ -0,0 +1,80 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="All dependencies" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["json-c"]="0.15-20200726" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch syscall_intercept_clone3.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "lz4" "capstone" "json-c" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( +) diff --git a/scripts/profiles/0.9.3-exp/direct.specs b/scripts/profiles/0.9.3-exp/direct.specs new file mode 100644 index 000000000..6f2a48123 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/direct.specs @@ -0,0 +1,76 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Direct dependencies" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( +) diff --git a/scripts/profiles/0.9.3-exp/infiniband_verbs.specs b/scripts/profiles/0.9.3-exp/infiniband_verbs.specs new file mode 100644 index 000000000..c34c4ff6d --- /dev/null +++ b/scripts/profiles/0.9.3-exp/infiniband_verbs.specs @@ -0,0 +1,82 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Dependencies for Infiniband supercomputer" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["json-c"]="0.15-20200726" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric%verbs"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading/installing +order=( + "lz4" "capstone" "json-c" "libfabric%verbs" "mercury" "argobots" "margo" "rocksdb" + "syscall_intercept" "date" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( + ["libfabric%verbs"]="--enable-verbs=yes" +) diff --git a/scripts/profiles/0.9.3-exp/install/Jerasure.install b/scripts/profiles/0.9.3-exp/install/Jerasure.install new file mode 100644 index 000000000..4b6f160bf --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/Jerasure.install @@ -0,0 +1,58 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="Jerasure" + CURR="${SOURCE_DIR}/${ID}" + cd "${CURR}" + autoreconf -fi + ./configure --prefix="${INSTALL_DIR}" + make -j"${CORES}" + make install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/agios.install b/scripts/profiles/0.9.3-exp/install/agios.install new file mode 100644 index 000000000..3d0392ff5 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/agios.install @@ -0,0 +1,57 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="agios" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}/build" + ${CMAKE} -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" .. + make install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/argobots.install b/scripts/profiles/0.9.3-exp/install/argobots.install new file mode 100644 index 000000000..96ccea920 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/argobots.install @@ -0,0 +1,60 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="argobots" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}" + ./autogen.sh + cd "${CURR}/build" + ../configure --prefix="${INSTALL_DIR}" --enable-perf-opt --disable-checks + make -j"${CORES}" + make install +} + +pkg_check() { + make check +} diff --git a/scripts/profiles/0.9.3-exp/install/capstone.install b/scripts/profiles/0.9.3-exp/install/capstone.install new file mode 100644 index 000000000..ff299d82b --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/capstone.install @@ -0,0 +1,57 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="capstone" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}/build" + ${CMAKE} -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" -DCMAKE_BUILD_TYPE:STRING=Release .. + make -j"${CORES}" install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/curl.install b/scripts/profiles/0.9.3-exp/install/curl.install new file mode 100644 index 000000000..d873819c4 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/curl.install @@ -0,0 +1,58 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="curl" + CURR="${SOURCE_DIR}/${ID}" + cd "${CURR}" + autoreconf -fi + ./configure --prefix="${INSTALL_DIR}" --without-ssl + make -j"${CORES}" + make install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/date.install b/scripts/profiles/0.9.3-exp/install/date.install new file mode 100644 index 000000000..c4f12524c --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/date.install @@ -0,0 +1,61 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="date" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}/build" + ${CMAKE} -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DCMAKE_CXX_STANDARD:STRING=14 \ + -DUSE_SYSTEM_TZ_DB:BOOL=ON \ + -DBUILD_SHARED_LIBS:BOOL=ON .. + make -j"${CORES}" install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/gf-complete.install b/scripts/profiles/0.9.3-exp/install/gf-complete.install new file mode 100644 index 000000000..70d675a2c --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/gf-complete.install @@ -0,0 +1,58 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="gf-complete" + CURR="${SOURCE_DIR}/${ID}" + cd "${CURR}" + autoreconf -fi + ./configure --prefix="${INSTALL_DIR}" + make -j"${CORES}" + make install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/json-c.install b/scripts/profiles/0.9.3-exp/install/json-c.install new file mode 100644 index 000000000..7280cda81 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/json-c.install @@ -0,0 +1,60 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + CURR="${SOURCE_DIR}/json-c" + prepare_build_dir "${CURR}" + cd "${CURR}/build" + cmake -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" -DCMAKE_BUILD_TYPE:STRING=Release .. + make -j"${CORES}" install + # Margo doesn't search in both directories, so we make it available in both lib and lib64 + if [[ -f "${INSTALL_DIR}/lib64/pkgconfig/json-c.pc" ]]; then + cp ${INSTALL_DIR}/lib64/pkgconfig/json-c.pc ${INSTALL_DIR}/lib/pkgconfig/ + fi +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/libfabric%verbs.install b/scripts/profiles/0.9.3-exp/install/libfabric%verbs.install new file mode 100644 index 000000000..15257eb81 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/libfabric%verbs.install @@ -0,0 +1,67 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="libfabric%verbs" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}" + ./autogen.sh + cd "${CURR}/build" + OFI_CONFIG="../configure --prefix=${INSTALL_DIR} --enable-tcp=yes" + + EXTRA_INSTALL_ARGS="${PROFILE_EXTRA_INSTALL_ARGS[${ID}]}" + + if [[ -n "${EXTRA_INSTALL_ARGS}" ]]; then + OFI_CONFIG="${OFI_CONFIG} ${EXTRA_INSTALL_ARGS}" + fi + + ${OFI_CONFIG} + make -j"${CORES}" install +} + +pkg_check() { + make check +} diff --git a/scripts/profiles/0.9.3-exp/install/libfabric.install b/scripts/profiles/0.9.3-exp/install/libfabric.install new file mode 100644 index 000000000..7db89438e --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/libfabric.install @@ -0,0 +1,67 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID=libfabric + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}" + ./autogen.sh + cd "${CURR}/build" + OFI_CONFIG="../configure --prefix=${INSTALL_DIR} --enable-tcp=yes" + + EXTRA_INSTALL_ARGS="${PROFILE_EXTRA_INSTALL_ARGS[${ID}]}" + + if [[ -n "${EXTRA_INSTALL_ARGS}" ]]; then + OFI_CONFIG="${OFI_CONFIG} ${EXTRA_INSTALL_ARGS}" + fi + + ${OFI_CONFIG} + make -j"${CORES}" install +} + +pkg_check() { + make check +} diff --git a/scripts/profiles/0.9.3-exp/install/lz4.install b/scripts/profiles/0.9.3-exp/install/lz4.install new file mode 100644 index 000000000..13e4e53b3 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/lz4.install @@ -0,0 +1,61 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="lz4" + CURR="${SOURCE_DIR}/${ID}" + cd "${CURR}" + # try to remove binaries first in case they already exist. Otherwise install fails. + LZ4_BINS=("${INSTALL_DIR}"/bin/lz4c "${INSTALL_DIR}"/bin/lz4cat "${INSTALL_DIR}"/bin/unlz4 "${INSTALL_DIR}"/bin/lz4) + for LZ4_BIN in "${LZ4_BINS[@]}"; do + [ -e "$LZ4_BIN" ] && rm "$LZ4_BIN" + done + make -j"${CORES}" + make DESTDIR="${INSTALL_DIR}" PREFIX="" install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/margo.install b/scripts/profiles/0.9.3-exp/install/margo.install new file mode 100644 index 000000000..d2c6e4b5f --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/margo.install @@ -0,0 +1,59 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="margo" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}" + ./prepare.sh + cd "${CURR}/build" + ../configure --prefix="${INSTALL_DIR}" PKG_CONFIG_PATH="${INSTALL_DIR}/lib/pkgconfig" CFLAGS="${CFLAGS} -Wall -O3" + make -j"${CORES}" install +} + +pkg_check() { + make check +} diff --git a/scripts/profiles/0.9.3-exp/install/mercury.install b/scripts/profiles/0.9.3-exp/install/mercury.install new file mode 100644 index 000000000..601305b83 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/mercury.install @@ -0,0 +1,82 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + + # if the profile compiles bmi, enable it + if [[ -n "${PROFILE_DEP_NAMES['bmi']}" ]]; then + USE_BMI="-DNA_USE_BMI:BOOL=ON" + else + USE_BMI="-DNA_USE_BMI:BOOL=OFF" + fi + + # if the profile provides any flavour of libfabric, enable it + if profile_has_dependency "^libfabric.*$"; then + USE_OFI="-DNA_USE_OFI:BOOL=ON" + else + USE_OFI="-DNA_USE_OFI:BOOL=OFF" + fi + + ID="mercury" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}/build" + PKG_CONFIG_PATH="${INSTALL_DIR}/lib/pkgconfig" $CMAKE \ + -DCMAKE_PREFIX_PATH=${INSTALL_DIR} \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DBUILD_TESTING:BOOL=ON \ + -DMERCURY_USE_CHECKSUMS:BOOL=OFF \ + -DMERCURY_USE_BOOST_PP:BOOL=ON \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ + ${USE_BMI} ${USE_OFI} \ + .. + make -j"${CORES}" + make install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/parallax.install b/scripts/profiles/0.9.3-exp/install/parallax.install new file mode 100644 index 000000000..38bd9c689 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/parallax.install @@ -0,0 +1,64 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + + CURR="${SOURCE_DIR}/parallax" + # sed -i -e 's/KEY_SIZE (256)/KEY_SIZE (4096)/g' ${CURR}/lib/btree/conf.h + prepare_build_dir "${CURR}" + cd "${CURR}/build" + PKG_CONFIG_PATH="${INSTALL_DIR}/lib/pkgconfig" $CMAKE \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ + -DCMAKE_BUILD_TYPE="Release" \ + -DCMAKE_CXX_FLAGS_RELEASE="-Wno-error=unused-result" \ + -DDISABLE_LOGGING:BOOL=ON \ + .. + make -j"${CORES}" + make install + # We need to copy this file as it is not installed + cp ${CURR}/lib/include/parallax/structures.h ${INSTALL_DIR}/include/ +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/prometheus-cpp.install b/scripts/profiles/0.9.3-exp/install/prometheus-cpp.install new file mode 100644 index 000000000..62e420217 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/prometheus-cpp.install @@ -0,0 +1,61 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="prometheus-cpp" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}/build" + ${CMAKE} \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + .. + make -j"${CORES}" install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/rocksdb.install b/scripts/profiles/0.9.3-exp/install/rocksdb.install new file mode 100644 index 000000000..6ece050bd --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/rocksdb.install @@ -0,0 +1,79 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + CXXFLAGS='' + # gcc 9 and clang 8 need -Wno-error=deprecated-copy -Wno-error=pessimizing-move + if [[ ("${COMPILER_NAME}" == "g++" && "${COMPILER_MAJOR_VERSION}" -ge 9) || + ("${COMPILER_NAME}" == "clang" && "${COMPILER_MAJOR_VERSION}" -ge 8) ]]; then + CXXFLAGS='-Wno-error=deprecated-copy -Wno-error=pessimizing-move -Wno-error=maybe-uninitialized' + fi + + # TODO use SSE? + CURR="${SOURCE_DIR}/rocksdb" + prepare_build_dir "${CURR}" + cd "${CURR}/build" + PKG_CONFIG_PATH="${INSTALL_DIR}/lib/pkgconfig" $CMAKE \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS}" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_PREFIX_PATH="${INSTALL_DIR}" \ + -DCMAKE_INSTALL_LIBDIR="${INSTALL_DIR}/lib" \ + -DCMAKE_INSTALL_INCLUDEDIR="${INSTALL_DIR}/include" \ + -DROCKSDB_BUILD_SHARED=OFF \ + -DWITH_LZ4=ON \ + -DWITH_GFLAGS=OFF \ + -DUSE_RTTI=1 \ + -DPORTABLE=1 \ + -DWITH_ALL_TESTS=OFF \ + -DWITH_BENCHMARK_TOOLS=OFF \ + -DWITH_TOOLS=OFF .. + make -j"${CORES}" install + + +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/install/syscall_intercept.install b/scripts/profiles/0.9.3-exp/install/syscall_intercept.install new file mode 100644 index 000000000..9dd287271 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/install/syscall_intercept.install @@ -0,0 +1,61 @@ +################################################################################ +# Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ +# vi: ft=bash + +################################################################################ +## The installation script must define both a pkg_install function and +## pkg_check function that, as their name implies, must specify how +## a dependency package should be installed and tested. ## ## The following +## variables can be used in the installation script: +## - CMAKE: a variable that expands to the cmake binary +## - SOURCE_DIR: the directory where the sources for the package were +## downloaded +## - INSTALL_DIR: the directory where the package should be installed +## - CORES: the number of cores to use when building +## - COMPILER_NAME: the name of the compiler being used (e.g. g++, clang, etc.) +## - COMPILER_FULL_VERSION: the compiler's full version (e.g. 9.3.0) +## - COMPILER_MAJOR_VERSION: the compiler's major version (e.g. 9) +## - PERFORM_TEST: whether tests for the package should be executed +################################################################################ + +pkg_install() { + ID="syscall_intercept" + CURR="${SOURCE_DIR}/${ID}" + prepare_build_dir "${CURR}" + cd "${CURR}"/build + $CMAKE -DCMAKE_PREFIX_PATH="${INSTALL_DIR}" \ + -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ + -DCMAKE_BUILD_TYPE:STRING=Debug \ + -DBUILD_EXAMPLES:BOOL=OFF \ + -DBUILD_TESTS:BOOK=OFF .. + make -j"${CORES}" install +} + +pkg_check() { + : +} diff --git a/scripts/profiles/0.9.3-exp/marenostrum4.specs b/scripts/profiles/0.9.3-exp/marenostrum4.specs new file mode 100644 index 000000000..2fd272eff --- /dev/null +++ b/scripts/profiles/0.9.3-exp/marenostrum4.specs @@ -0,0 +1,83 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Dependencies for Marenostrum 4 supercomputer" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["psm2"]="11.2.185" + ["json-c"]="0.15-20200726" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" + ["parallax"]="c130decd7a71c60c20b98d6a23924f05f754c3cd" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "lz4" "capstone" "json-c" "psm2" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date" "parallax" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( + ["libfabric"]="--enable-psm2=no --enable-sockets=yes" +) diff --git a/scripts/profiles/0.9.3-exp/mogon2.specs b/scripts/profiles/0.9.3-exp/mogon2.specs new file mode 100644 index 000000000..71ebf7a65 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/mogon2.specs @@ -0,0 +1,83 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Dependencies for Mogon 2 supercomputer" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["psm2"]="11.2.185" + ["json-c"]="0.15-20200726" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" + ["parallax"]="c130decd7a71c60c20b98d6a23924f05f754c3cd" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "lz4" "capstone" "json-c" "psm2" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date" "parallax" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( + ["libfabric"]="--enable-psm2=yes --with-psm2-src=${SOURCE_DIR}/psm2" +) diff --git a/scripts/profiles/0.9.3-exp/ngio.specs b/scripts/profiles/0.9.3-exp/ngio.specs new file mode 100644 index 000000000..72c99cebd --- /dev/null +++ b/scripts/profiles/0.9.3-exp/ngio.specs @@ -0,0 +1,83 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Dependencies for NEXTGenIO prototype cluster" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["psm2"]="11.2.185" + ["json-c"]="0.15-20200726" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" + ["parallax"]="c130decd7a71c60c20b98d6a23924f05f754c3cd" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "lz4" "capstone" "json-c" "psm2" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date" "parallax" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( + ["libfabric"]="--enable-psm2=yes --with-psm2-src=${SOURCE_DIR}/psm2" +) diff --git a/scripts/profiles/0.9.3-exp/omnipath_psm2.specs b/scripts/profiles/0.9.3-exp/omnipath_psm2.specs new file mode 100644 index 000000000..10163c302 --- /dev/null +++ b/scripts/profiles/0.9.3-exp/omnipath_psm2.specs @@ -0,0 +1,82 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Dependencies for Omnipath supercomputer" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["psm2"]="11.2.185" + ["json-c"]="0.15-20200726" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="2c8765fa292bc9c28a22624c528580d54658813d" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "lz4" "capstone" "json-c" "psm2" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( + ["libfabric"]="--enable-psm2=yes --with-psm2-src=${SOURCE_DIR}/psm2" +) diff --git a/scripts/profiles/0.9.3-exp/p9.specs b/scripts/profiles/0.9.3-exp/p9.specs new file mode 100644 index 000000000..6c78aa1ee --- /dev/null +++ b/scripts/profiles/0.9.3-exp/p9.specs @@ -0,0 +1,82 @@ +################################################################################ +# Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# vi: ft=bash + +# Variables to be imported into the scripts +declare -A wgetdeps clonedeps clonedeps_args clonedeps_patches extra_install_args +declare -a order + +# Comment that should be displayed when printing the profile +comment="Dependencies for PowerPC supercomputer" + +# Dependencies that must be downloaded directly +wgetdeps=( + ["lz4"]="1.9.3" + ["capstone"]="4.0.2" + ["argobots"]="1.1" + ["rocksdb"]="6.26.1" + ["psm2"]="11.2.185" + ["json-c"]="0.15-20200726" +) + +# Dependencies that must be cloned +clonedeps=( + ["libfabric"]="HEAD@v1.13.2" + ["mercury"]="v2.1.0" + ["margo"]="v0.9.6" + ["syscall_intercept"]="6eb27a9d2053bb2ac3bb9ce30e13b64ce055c19f" + ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" +) + +# Extra arguments for git clone +clonedeps_args=( + ["mercury"]="--recurse-submodules" +) + +# Patches that should be applied post-clone +clonedeps_patches=( + ["syscall_intercept"]="syscall_intercept.patch" +) + +# Ordering that MUST be followed when downloading +order=( + "lz4" "capstone" "json-c" "psm2" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date" +) + +# Extra arguments passed to the installation script. As such, they can +# reference the following variables: +# - CMAKE: a variable that expands to the cmake binary +# - SOURCE_DIR: the directory where the sources for the package were +# downloaded +# - INSTALL_DIR: the directory where the package should be installed +# - CORES: the number of cores to use when building +# - PERFORM_TEST: whether tests for the package should be executed +extra_install_args=( + ["libfabric"]="--enable-psm2=no --enable-sockets=yes" +) diff --git a/scripts/profiles/sources.list b/scripts/profiles/sources.list index 3c1082372..fcc2f4ed0 100644 --- a/scripts/profiles/sources.list +++ b/scripts/profiles/sources.list @@ -53,6 +53,8 @@ sources=( ["parallax"]="https://github.com/CARV-ICS-FORTH/parallax.git" ["prometheus-cpp"]="https://github.com/jupp0r/prometheus-cpp/releases/download/{{VERSION}}/prometheus-cpp-with-submodules.tar.gz" ["curl"]="https://curl.se/download/curl-{{VERSION}}.tar.gz" + ["gf-complete"]="https://github.com/ceph/gf-complete" + ["Jerasure"]="https://github.com/tsuraan/Jerasure" ) diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 414e54012..5ea7fd3ac 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -62,7 +62,8 @@ target_link_libraries( fmt::fmt Threads::Threads Date::TZ - Jerasure + GF_complete::GF_complete + Jerasure::Jerasure ) install( diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 79570ad9e..67a1169f3 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -46,7 +46,7 @@ extern "C" { } #include -#include +#include using namespace std; /* -- GitLab From cf99e85d78c5e5127cb6b3e9d5467ef28529259e Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 7 Jun 2023 07:36:37 +0200 Subject: [PATCH 08/17] Added GKFS_ENABLE_EC First Write - ECC cout -> LOG WIP Decode EC to restore chunk read recovery WIP --- CHANGELOG.md | 2 +- CMake/gkfs-options.cmake | 10 + CMakeLists.txt | 10 +- README.md | 5 + include/client/rpc/forward_data.hpp | 17 +- include/common/rpc/distributor.hpp | 1 - src/client/CMakeLists.txt | 16 +- src/client/gkfs_functions.cpp | 200 ++++++++++-------- src/client/preload.cpp | 2 +- src/client/rpc/forward_data.cpp | 305 ++++++++++++++++++++++++++-- 10 files changed, 453 insertions(+), 115 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f60ddb994..db3a8e823 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). replicas ([!166](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/141)). - Modified write and reads to use a bitset instead of the traditional hash per chunk in the server. - Added reattemp support in get_fs_config to other servers, when the initial server fails. - +- Added support for Erasure codes ([!1xx]) ### New - Additional tests to increase code coverage ([!141](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/141)). diff --git a/CMake/gkfs-options.cmake b/CMake/gkfs-options.cmake index 598de903c..fbed15b8f 100644 --- a/CMake/gkfs-options.cmake +++ b/CMake/gkfs-options.cmake @@ -370,4 +370,14 @@ gkfs_define_option( DESCRIPTION "If GKFS_ENABLE_FORWARDING is ON, use AGIOS for scheduling I/Os" ) +################################################################################ +# Erasure codes +################################################################################ + +gkfs_define_option( + GKFS_ENABLE_EC + HELP_TEXT "Enable Erasure Code" + DEFAULT_VALUE OFF + DESCRIPTION "Use Jerasure for erasure codes reliability" +) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1aedca403..2070e9173 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,8 +203,14 @@ if(GKFS_ENABLE_PROMETHEUS) find_package(prometheus-cpp REQUIRED) # >= 1.0.0 endif() -find_package(GF_complete) -find_package(Jerasure) +### Jerasure: needed for the calculation of Erasure codes +if(GKFS_ENABLE_EC) + message(STATUS "[${PROJECT_NAME}] Checking for Jerasure") + add_compile_definitions(GKFS_ENABLE_EC) + find_package(GF_complete REQUIRED) + find_package(Jerasure REQUIRED) +endif() + ### Other stuff that can be found out using find_package: # determine the thread library of the system diff --git a/README.md b/README.md index fff5deead..3ac2dba98 100644 --- a/README.md +++ b/README.md @@ -325,6 +325,11 @@ The user can enable the data replication feature by setting the replication envi The number of replicas should go from 0 to the number of servers-1. The replication environment variable can be set up for each client, independently. +### Erasure codes +The user can enable resilience with erasure codes with -DGKFS_ENABLE_EC +Using `LIBGKFS_NUM_REPL=`, the user can define the number of EC servers. +The total servers available for data will be -> total servervs - num_repl + ## Acknowledgment This software was partially supported by the EC H2020 funded NEXTGenIO project (Project ID: 671951, www.nextgenio.eu). diff --git a/include/client/rpc/forward_data.hpp b/include/client/rpc/forward_data.hpp index 072d4bc9f..28e05fa9f 100644 --- a/include/client/rpc/forward_data.hpp +++ b/include/client/rpc/forward_data.hpp @@ -43,7 +43,10 @@ struct ChunkStat { // TODO once we have LEAF, remove all the error code returns and throw them as // an exception. - +std::pair +ecc_forward_write(const std::string& path, const void* buf, const size_t write_size, + const int8_t server); + std::pair forward_write(const std::string& path, const void* buf, off64_t offset, size_t write_size, const int8_t num_copy = 0); @@ -59,12 +62,12 @@ forward_truncate(const std::string& path, size_t current_size, size_t new_size, std::pair forward_get_chunk_stat(); -#define GKFS_USE_ECC_DISTRIBUTION 1 -#ifdef GKFS_USE_ECC_DISTRIBUTION -std::pair calc_op_chunks(const std::string& path, const bool append_flag, - const off64_t in_offset, const size_t write_size, - const int64_t updated_metadentry_size); -#endif + +std::pair +calc_op_chunks(const std::string& path, const bool append_flag, + const off64_t in_offset, const size_t write_size, + const int64_t updated_metadentry_size); + } // namespace gkfs::rpc diff --git a/include/common/rpc/distributor.hpp b/include/common/rpc/distributor.hpp index bbec815ce..697a1df6c 100644 --- a/include/common/rpc/distributor.hpp +++ b/include/common/rpc/distributor.hpp @@ -36,7 +36,6 @@ #include #include #include -#define GKFS_USE_ECC_DISTRIBUTION 1 namespace gkfs::rpc { using chunkid_t = unsigned int; diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 5ea7fd3ac..8c3f117e9 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -53,6 +53,19 @@ target_sources( ) target_link_libraries( + gkfs_intercept + PRIVATE metadata distributor env_util arithmetic path_util rpc_utils + PUBLIC Syscall_intercept::Syscall_intercept + dl + Mercury::Mercury + hermes + fmt::fmt + Threads::Threads + Date::TZ +) + +if(GKFS_ENABLE_EC) + target_link_libraries( gkfs_intercept PRIVATE metadata distributor env_util arithmetic path_util rpc_utils PUBLIC Syscall_intercept::Syscall_intercept @@ -64,7 +77,8 @@ target_link_libraries( Date::TZ GF_complete::GF_complete Jerasure::Jerasure -) + ) +endif() install( TARGETS gkfs_intercept diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 67a1169f3..08604a243 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -44,9 +44,12 @@ extern "C" { #include #include } - +#define GKFS_ENABLE_EC 1 +#ifdef GKFS_ENABLE_EC #include #include +#endif + using namespace std; /* @@ -860,63 +863,12 @@ gkfs_dup2(const int oldfd, const int newfd) { return CTX->file_map()->dup2(oldfd, newfd); } -/** - * Wrapper function for all gkfs write operations - * errno may be set - * @param file - * @param buf - * @param count - * @param offset - * @param update_pos pos should only be updated for some write operations (see - * man 2 pwrite) - * @return written size or -1 on error - */ -ssize_t -gkfs_pwrite(std::shared_ptr file, const char* buf, - size_t count, off64_t offset, bool update_pos) { - if(file->type() != gkfs::filemap::FileType::regular) { - assert(file->type() == gkfs::filemap::FileType::directory); - LOG(WARNING, "Cannot write to directory"); - errno = EISDIR; - return -1; - } - auto path = make_unique(file->path()); - auto is_append = file->get_flag(gkfs::filemap::OpenFile_flags::append); - auto write_size = 0; - auto num_replicas = CTX->get_replicas(); - - auto ret_offset = gkfs::rpc::forward_update_metadentry_size( - *path, count, offset, is_append, num_replicas); - auto err = ret_offset.first; - if(err) { - LOG(ERROR, "update_metadentry_size() failed with err '{}'", err); - errno = err; - return -1; - } - if(is_append) { - // When append is set the EOF is set to the offset - // forward_update_metadentry_size returns. This is because it is an - // atomic operation on the server and reserves the space for this append - if(ret_offset.second == -1) { - LOG(ERROR, - "update_metadentry_size() received -1 as starting offset. " - "This occurs when the staring offset could not be extracted " - "from RocksDB's merge operations. Inform GekkoFS devs."); - errno = EIO; - return -1; - } - offset = ret_offset.second; - } - - auto ret_write = gkfs::rpc::forward_write(*path, buf, offset, count, 0); - err = ret_write.first; - write_size = ret_write.second; -#define GKFS_USE_ECC_DISTRIBUTION 1 -#ifdef GKFS_USE_ECC_DISTRIBUTION - // Process ECC calculation - - // 0 - Involved chunks: +bool +gkfs_ecc_write(std::shared_ptr file, size_t count, + off64_t offset, off64_t updated_size) { + auto path = make_shared(file->path()); + auto append_flag = file->get_flag(gkfs::filemap::OpenFile_flags::append); auto chunks = gkfs::rpc::calc_op_chunks(*path, append_flag, offset, count, updated_size); @@ -943,25 +895,25 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, // 1 - Read data from the other chunks std::vector buffers( - CTX->hosts().size(), - (char*) malloc(gkfs::config::rpc::chunksize)); - std::cout << "OPERATION " - << " --- Size : " << updated_size - << " Chunks Range:" << chunks.first << " -- " << chunks.second - << " Data + Repliscas " << data_servers << " -- " - << CTX->get_replicas() << std::endl; + data_servers, (char*) malloc(gkfs::config::rpc::chunksize)); + LOG(DEBUG, + "Operation Size {} - Range {}-{} - data_servers {} replica_servers {}", + updated_size, chunks.first, chunks.second, data_servers, + CTX->get_replicas()); + // TODO : This could be optimised, with a single read loop for(auto i : chunk_set) { - std::cout << i << " --- Size : " << updated_size << std::endl; + for(auto j = i; j < i + data_servers; ++j) { std::set failed; - std::cout << " Reading chunk " - << " [" << i << "] --> " << j << std::endl; + LOG(DEBUG, "Reading Chunk {} -> {}", i, j); + auto out = gkfs::rpc::forward_read( *path, buffers[j - i], j * gkfs::config::rpc::chunksize, gkfs::config::rpc::chunksize, 0, failed); - std::cout << " Read Success " << out.first << " -- " - << out.second << std::endl; + if(out.first != 0) { + LOG(ERROR, "Read Parity Error: {}", out.first); + } } // We have all the data to process a EC @@ -976,15 +928,84 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, buffers.data(), coding.data(), gkfs::config::rpc::chunksize); - std::cout << " Parity computation done " << std::endl; + LOG(DEBUG, "EC computation finished"); + + // Write erasure + std::string ecc_path = file->path() + "_ecc_" + to_string(i) + "_" + + to_string(i + data_servers); + for(int i = 0; i < CTX->get_replicas(); i++) { + auto ecc_write = gkfs::rpc::ecc_forward_write( + ecc_path, coding[i], gkfs::config::rpc::chunksize, + i + data_servers); + if(ecc_write.first != 0) { + LOG(ERROR, "write Parity Error: {}", ecc_write.first); + return false; + } else { + LOG(DEBUG, "write Parity OK: {}", ecc_path); + } + } } } else { - std::cout << "No EC in small files" << std::endl; + LOG(DEBUG, "No EC in small files"); + return false; } - // 2 - Calc Erasure codes + return true; +} - // 3 - Write erasure codes +/** + * Wrapper function for all gkfs write operations + * errno may be set + * @param file + * @param buf + * @param count + * @param offset + * @param update_pos pos should only be updated for some write operations (see + * man 2 pwrite) + * @return written size or -1 on error + */ +ssize_t +gkfs_pwrite(std::shared_ptr file, const char* buf, + size_t count, off64_t offset, bool update_pos) { + if(file->type() != gkfs::filemap::FileType::regular) { + assert(file->type() == gkfs::filemap::FileType::directory); + LOG(WARNING, "Cannot write to directory"); + errno = EISDIR; + return -1; + } + auto path = make_unique(file->path()); + auto is_append = file->get_flag(gkfs::filemap::OpenFile_flags::append); + auto write_size = 0; + auto num_replicas = CTX->get_replicas(); + auto ret_offset = gkfs::rpc::forward_update_metadentry_size( + *path, count, offset, is_append, num_replicas); + auto err = ret_offset.first; + if(err) { + LOG(ERROR, "update_metadentry_size() failed with err '{}'", err); + errno = err; + return -1; + } + if(is_append) { + // When append is set the EOF is set to the offset + // forward_update_metadentry_size returns. This is because it is an + // atomic operation on the server and reserves the space for this append + if(ret_offset.second == -1) { + LOG(ERROR, + "update_metadentry_size() received -1 as starting offset. " + "This occurs when the staring offset could not be extracted " + "from RocksDB's merge operations. Inform GekkoFS devs."); + errno = EIO; + return -1; + } + offset = ret_offset.second; + } + + auto ret_write = gkfs::rpc::forward_write(*path, buf, offset, count, 0); + err = ret_write.first; + write_size = ret_write.second; + +#ifdef GKFS_ENABLE_EC + gkfs_ecc_write(file, count, offset, updated_size); #else if(num_replicas > 0) { @@ -1124,8 +1145,8 @@ gkfs_pread(std::shared_ptr file, char* buf, return -1; } - // Zeroing buffer before read is only relevant for sparse files. Otherwise - // sparse regions contain invalid data. + // Zeroing buffer before read is only relevant for sparse files. + // Otherwise sparse regions contain invalid data. if constexpr(gkfs::config::io::zero_buffer_before_read) { memset(buf, 0, sizeof(char) * count); } @@ -1136,10 +1157,14 @@ gkfs_pread(std::shared_ptr file, char* buf, ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, 0, failed); while(ret.first == EIO) { +#ifdef GKFS_ENABLE_EC + LOG (WARNING,"failed to read"); +#else ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, CTX->get_replicas(), failed); LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'", ret.first); +#endif } } else { @@ -1356,11 +1381,11 @@ gkfs_getdents(unsigned int fd, struct linux_dirent* dirp, unsigned int count) { * Calculate the total dentry size within the kernel struct * `linux_dirent` depending on the file name size. The size is then * aligned to the size of `long` boundary. This line was originally - * defined in the linux kernel: fs/readdir.c in function filldir(): int - * reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, - * sizeof(long)); However, since d_name is null-terminated and - * de.name().size() does not include space for the null-terminator, we - * add 1. Thus, + 3 in total. + * defined in the linux kernel: fs/readdir.c in function filldir(): + * int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + * + 2, sizeof(long)); However, since d_name is null-terminated and + * de.name().size() does not include space for the null-terminator, + * we add 1. Thus, + 3 in total. */ auto total_size = ALIGN(offsetof(struct linux_dirent, d_name) + de.name().size() + 3, @@ -1427,13 +1452,14 @@ gkfs_getdents64(unsigned int fd, struct linux_dirent64* dirp, * `linux_dirent` depending on the file name size. The size is then * aligned to the size of `long` boundary. * - * This line was originally defined in the linux kernel: fs/readdir.c in - * function filldir64(): int reclen = ALIGN(offsetof(struct - * linux_dirent64, d_name) + namlen + 1, sizeof(u64)); We keep + 1 - * because: Since d_name is null-terminated and de.name().size() does - * not include space for the null-terminator, we add 1. Since d_name in - * our `struct linux_dirent64` definition is not a zero-size array (as - * opposed to the kernel version), we subtract 1. Thus, it stays + 1. + * This line was originally defined in the linux kernel: + * fs/readdir.c in function filldir64(): int reclen = + * ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, + * sizeof(u64)); We keep + 1 because: Since d_name is + * null-terminated and de.name().size() does not include space for + * the null-terminator, we add 1. Since d_name in our `struct + * linux_dirent64` definition is not a zero-size array (as opposed + * to the kernel version), we subtract 1. Thus, it stays + 1. */ auto total_size = ALIGN(offsetof(struct linux_dirent64, d_name) + de.name().size() + 1, diff --git a/src/client/preload.cpp b/src/client/preload.cpp index c4d1a2490..25056a86d 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -223,7 +223,7 @@ init_environment() { #ifdef GKFS_USE_GUIDED_DISTRIBUTION auto distributor = std::make_shared( CTX->local_host_id(), CTX->hosts().size()); -#elif GKFS_USE_ECC_DISTRIBUTION +#elif GKFS_ENABLE_EC auto distributor = std::make_shared( CTX->local_host_id(), CTX->hosts().size(), CTX->get_replicas()); #else diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 26de8270f..9cd61be2d 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -38,6 +38,12 @@ #include +#define GKFS_ENABLE_EC 1 +#ifdef GKFS_ENABLE_EC +#include +#include +#endif + using namespace std; namespace gkfs::rpc { @@ -47,17 +53,17 @@ namespace gkfs::rpc { * NOTE: No errno is defined here! */ -#ifdef GKFS_USE_ECC_DISTRIBUTION /** - * @brief Calculate the chunk start and end that will be affected by the operation. - * - * @param path - * @param append_flag - * @param in_offset - * @param write_size - * @param updated_metadentry_size - * @param num_copies - * @return pair + * @brief Calculate the chunk start and end that will be affected by the + * operation. + * + * @param path + * @param append_flag + * @param in_offset + * @param write_size + * @param updated_metadentry_size + * @param num_copies + * @return pair */ std::pair calc_op_chunks(const std::string& path, const bool append_flag, @@ -72,10 +78,139 @@ calc_op_chunks(const std::string& path, const bool append_flag, gkfs::config::rpc::chunksize); - return make_pair(chnk_start, chnk_end); + return make_pair(chnk_start, chnk_end); } -#endif +// #ifdef GKFS_ENABLE_EC +/** + * Send an RPC request to write from a buffer. + * There is a bitset of 1024 chunks to tell the server + * which chunks to process. Exceeding this value will work without + * replication. Another way is to leverage mercury segments. + * TODO: Decide how to manage a write to a replica that doesn't exist + * @param path + * @param buf + * @param append_flag + * @param in_offset + * @param write_size + * @param updated_metadentry_size + * @param num_copies number of replicas + * @return pair + */ +pair +ecc_forward_write(const string& path, const void* buf, const size_t write_size, + const int8_t server) { + + // import pow2-optimized arithmetic functions + using namespace gkfs::utils::arithmetic; + + assert(write_size > 0); + + std::vector write_ops_vect(8); + gkfs::rpc::set_bitset(write_ops_vect, 0); + + // some helper variables for async RPC + std::vector bufseq{ + hermes::mutable_buffer{const_cast(buf), write_size}, + }; + + // expose user buffers so that they can serve as RDMA data sources + // (these are automatically "unexposed" when the destructor is called) + hermes::exposed_memory local_buffers; + + try { + local_buffers = ld_network_service->expose( + bufseq, hermes::access_mode::read_only); + + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to expose buffers for RMA"); + return make_pair(EBUSY, 0); + } + + std::vector> handles; + + // Issue non-blocking RPC requests and wait for the result later + // + // TODO(amiranda): This could be simplified by adding a vector of inputs + // to async_engine::broadcast(). This would allow us to avoid manually + // looping over handles as we do below + auto target = server; + + // total chunk_size for target + auto total_chunk_size = gkfs::config::rpc::chunksize; + + auto endp = CTX->hosts().at(target); + + try { + LOG(DEBUG, "Sending RPC ..."); + + gkfs::rpc::write_data::input in( + path, + // first offset in targets is the chunk with + // a potential offset + 0, target, CTX->hosts().size(), + // number of chunks handled by that destination + gkfs::rpc::compressBitset(write_ops_vect), 1, + // chunk start id of this write + 0, + // chunk end id of this write + 0, + // total size to write + total_chunk_size, local_buffers); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + LOG(DEBUG, "host: {}, path: \"{}\", size: {}", target, path, + total_chunk_size); + } catch(const std::exception& ex) { + LOG(ERROR, + "Unable to send non-blocking rpc for " + "path \"{}\" [peer: {}]", + path, target); + + return make_pair(EBUSY, 0); + } + + auto err = 0; + ssize_t out_size = 0; + std::size_t idx = 0; + + for(const auto& h : handles) { + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + LOG(ERROR, "Daemon reported error: {}", out.err()); + err = out.err(); + } else { + out_size += static_cast(out.io_size()); + } + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to get rpc output for path \"{}\"", path); + err = EIO; + } + idx++; + } + + /* + * Typically file systems return the size even if only a part of it was + * written. In our case, we do not keep track which daemon fully wrote its + * workload. Thus, we always return size 0 on error. + */ + if(err) + return make_pair(err, 0); + else + return make_pair(0, out_size); +} +// #endif /** * Send an RPC request to write from a buffer. @@ -313,6 +448,84 @@ forward_write(const string& path, const void* buf, const off64_t offset, return make_pair(0, out_size); } + +// To recover a missing chunk, we need to read all the remaining +// And apply the reconstruction function. +// This function is similar to the creation function +bool +gkfs_ecc_recover(const std::string& path, std::vector buffer_recover, + uint64_t chunk_candidate, uint64_t failed_server) { + + std::vector buffers(CTX->hosts().size(), + (char*) malloc(gkfs::config::rpc::chunksize)); + + auto data_servers = CTX->hosts().size() - CTX->get_replicas(); + + auto initial_row_chunk = (chunk_candidate / data_servers) * data_servers; + + + // Parity Stored in : parity1 .. parity2, as name = + // [PARITY][Path][Initial row chunk] + + // 1 - Read data from the other chunks plus the parity + + LOG(DEBUG, "Operation Size - Range {} - data_servers {} replica_servers {}", + initial_row_chunk, data_servers, CTX->get_replicas()); + + vector erased(CTX->hosts().size(), 1); + + auto i = initial_row_chunk; + + for(auto j = i; j < i + data_servers; ++j) { + std::set failed; + LOG(DEBUG, "Reading Chunk {} -> {}, from server {} ", i, j, j - i); + + auto out = gkfs::rpc::forward_read( + path, buffers[j - i], j * gkfs::config::rpc::chunksize, + gkfs::config::rpc::chunksize, 0, failed); + if(out.first != 0) { + LOG(ERROR, "Read Parity Error: {}", out.first); + erased[j - i] = 0; + } + } + + std::string ecc_path = + path + "_ecc_" + to_string(i) + "_" + to_string(i + data_servers); + + for(auto j = i + data_servers; j < i + CTX->hosts().size(); ++j) { + std::set failed; + LOG(DEBUG, "Reading EC Chunk {} {} -> {}, from server {} ", ecc_path, + i + data_servers, j, j - i + data_servers); + + auto out = gkfs::rpc::forward_read( + ecc_path, buffers[j - i + data_servers], + j * gkfs::config::rpc::chunksize, gkfs::config::rpc::chunksize, + 0, failed); + if(out.first != 0) { + LOG(ERROR, "Read Parity Error: {}", out.first); + erased[j - i + data_servers] = 0; + } + } + + + // We have all the data to recover the buffer + auto matrix = reed_sol_vandermonde_coding_matrix(data_servers, + CTX->get_replicas(), 8); + jerasure_matrix_decode(data_servers, CTX->get_replicas(), 8, matrix, 0, + erased.data(), buffers.data(), + buffers.data() + + gkfs::config::rpc::chunksize * data_servers, + gkfs::config::rpc::chunksize); + + memcpy(buffer_recover.data(), + buffers.data() + erased.front() * gkfs::config::rpc::chunksize, + gkfs::config::rpc::chunksize); + LOG(DEBUG, "EC computation finished"); + + return true; +} // namespace gkfs::rpc + + /** * Send an RPC request to read to a buffer. * @param path @@ -359,12 +572,11 @@ forward_read(const string& path, void* buf, const off64_t offset, rand() % num_copies); } } - if(read_bitset_vect.find(target) == read_bitset_vect.end()) read_bitset_vect[target] = std::vector(((chnk_total + 7) / 8)); - read_bitset_vect[target][(chnk_id - chnk_start) / 8] |= - 1 << ((chnk_id - chnk_start) % 8); // set + + gkfs::rpc::set_bitset(read_bitset_vect[target], chnk_id - chnk_start); if(target_chnks.count(target) == 0) { target_chnks.insert( @@ -507,6 +719,69 @@ forward_read(const string& path, void* buf, const off64_t offset, failed.insert(targets[idx]); // Then repeat the read with another peer (We repear the full // read, this can be optimised but it is a cornercase) + +#ifdef GKFS_ENABLE_EC + // We try to recover the missing data from the failed server + // obtain the full chunk from all the other servers + // Decode the data + // Fill the gaps, and then remove the failed server while + // keeping the variables consistent. + auto failed_server = targets[idx]; + + // For all the chunks activated in the bitset, recover and fill the + // buffer. + for(auto chnk_id_file = chnk_start; chnk_id_file <= chnk_end; + chnk_id_file++) { + // Continue if chunk does not hash to this host + // We only check if we are not using replicas + + if(!(gkfs::rpc::get_bitset(read_bitset_vect[failed_server], + chnk_id_file - chnk_start))) { + + continue; + } + + // We have a chunk to recover + // We don't need to worry about offset etc... just use the chunk + // number + std::vector recovered_chunk( + 1, (char*) malloc(gkfs::config::rpc::chunksize)); + gkfs::rpc::gkfs_ecc_recover(path, recovered_chunk, chnk_id_file, + failed_server); + + // Move recovered_chunk to the buffer, first and last chunk + // should substract... + auto recover_size = gkfs::config::rpc::chunksize; + auto recover_offt = chnk_id_file * gkfs::config::rpc::chunksize; + auto recover_offt_chunk = (chnk_id_file - chnk_start) * + gkfs::config::rpc::chunksize; + + if(chnk_id_file == chnk_start) { + // We may need to move the offset of both buffers and reduce + // the recover size + auto offset_fc = + block_overrun(offset, gkfs::config::rpc::chunksize); + recover_offt += offset_fc; + recover_offt_chunk += offset_fc; + recover_size -= offset_fc; + } + if(chnk_id_file == chnk_end) { + // We may need to reduce the recover size. + if(!is_aligned(offset + read_size, + gkfs::config::rpc::chunksize)) { + recover_size -= + block_underrun(offset + read_size, + gkfs::config::rpc::chunksize); + } + } + LOG(DEBUG, + "Recovered chunk : Start Offset {}/OffsetChunk {} - Size {}", + recover_offt, recover_offt_chunk, recover_size); + memcpy((char*)buf + recover_offt, + recovered_chunk.data() + recover_offt_chunk, + recover_size); + } +#endif } idx++; } -- GitLab From 62eeacb4e0fca6ec2cdf554c558edef8ae4dc8d5 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Mon, 12 Jun 2023 15:09:03 +0200 Subject: [PATCH 09/17] Testing EC recovery --- src/client/rpc/forward_data.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 9cd61be2d..4a05ec3b9 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -453,7 +453,7 @@ forward_write(const string& path, const void* buf, const off64_t offset, // And apply the reconstruction function. // This function is similar to the creation function bool -gkfs_ecc_recover(const std::string& path, std::vector buffer_recover, +gkfs_ecc_recover(const std::string& path, void* buffer_recover, uint64_t chunk_candidate, uint64_t failed_server) { std::vector buffers(CTX->hosts().size(), @@ -506,7 +506,8 @@ gkfs_ecc_recover(const std::string& path, std::vector buffer_recover, erased[j - i + data_servers] = 0; } } - + // We force a failure + erased[failed_server] = 1; // We have all the data to recover the buffer auto matrix = reed_sol_vandermonde_coding_matrix(data_servers, @@ -517,7 +518,7 @@ gkfs_ecc_recover(const std::string& path, std::vector buffer_recover, gkfs::config::rpc::chunksize * data_servers, gkfs::config::rpc::chunksize); - memcpy(buffer_recover.data(), + memcpy(buffer_recover, buffers.data() + erased.front() * gkfs::config::rpc::chunksize, gkfs::config::rpc::chunksize); LOG(DEBUG, "EC computation finished"); @@ -708,7 +709,9 @@ forward_read(const string& path, void* buf, const off64_t offset, LOG(ERROR, "Daemon reported error: {}", out.err()); err = out.err(); } - + if(rand() % 2 == 0) { + throw std::exception(); + } out_size += static_cast(out.io_size()); } catch(const std::exception& ex) { @@ -744,8 +747,7 @@ forward_read(const string& path, void* buf, const off64_t offset, // We have a chunk to recover // We don't need to worry about offset etc... just use the chunk // number - std::vector recovered_chunk( - 1, (char*) malloc(gkfs::config::rpc::chunksize)); + void* recovered_chunk = malloc(gkfs::config::rpc::chunksize); gkfs::rpc::gkfs_ecc_recover(path, recovered_chunk, chnk_id_file, failed_server); @@ -777,8 +779,8 @@ forward_read(const string& path, void* buf, const off64_t offset, LOG(DEBUG, "Recovered chunk : Start Offset {}/OffsetChunk {} - Size {}", recover_offt, recover_offt_chunk, recover_size); - memcpy((char*)buf + recover_offt, - recovered_chunk.data() + recover_offt_chunk, + memcpy((char*) buf + recover_offt, + (char*) recovered_chunk + recover_offt_chunk, recover_size); } #endif -- GitLab From ddc502bde86ceb562444b4b7ab20cc81757076c5 Mon Sep 17 00:00:00 2001 From: rnou Date: Tue, 13 Jun 2023 09:09:10 +0200 Subject: [PATCH 10/17] Updated Jerasure installation, fixing includes WIP Missing write with char** --- CMake/FindJerasure.cmake | 9 +- include/client/rpc/forward_data.hpp | 11 +- .../0.9.3-exp/install/Jerasure.install | 1 + src/client/gkfs_functions.cpp | 8 +- src/client/rpc/forward_data.cpp | 231 +++++++++++++++--- 5 files changed, 215 insertions(+), 45 deletions(-) diff --git a/CMake/FindJerasure.cmake b/CMake/FindJerasure.cmake index ffd0ece1e..8117b47e6 100644 --- a/CMake/FindJerasure.cmake +++ b/CMake/FindJerasure.cmake @@ -34,13 +34,8 @@ # Jerasure_LIBRARIES # -# - Try to find galois as Jerasure.h is installed in the root include find_path(Jerasure_INCLUDE_DIR - NAMES jerasure.h - ) - -find_path(Jerasure2_INCLUDE_DIR - NAMES galois.h + NAMES jerasure/jerasure.h ) find_library(Jerasure_LIBRARY @@ -48,7 +43,7 @@ find_library(Jerasure_LIBRARY ) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args( Jerasure +find_package_handle_standard_args(Jerasure DEFAULT_MSG Jerasure_INCLUDE_DIR Jerasure_LIBRARY diff --git a/include/client/rpc/forward_data.hpp b/include/client/rpc/forward_data.hpp index 28e05fa9f..5248e90bd 100644 --- a/include/client/rpc/forward_data.hpp +++ b/include/client/rpc/forward_data.hpp @@ -43,10 +43,15 @@ struct ChunkStat { // TODO once we have LEAF, remove all the error code returns and throw them as // an exception. + +std::pair +ecc_forward_read(const std::string& path, void* buf, const size_t read_size, + const int8_t server); + std::pair -ecc_forward_write(const std::string& path, const void* buf, const size_t write_size, - const int8_t server); - +ecc_forward_write(const std::string& path, const void* buf, + const size_t write_size, const int8_t server); + std::pair forward_write(const std::string& path, const void* buf, off64_t offset, size_t write_size, const int8_t num_copy = 0); diff --git a/scripts/profiles/0.9.3-exp/install/Jerasure.install b/scripts/profiles/0.9.3-exp/install/Jerasure.install index 4b6f160bf..5eda7d3f3 100644 --- a/scripts/profiles/0.9.3-exp/install/Jerasure.install +++ b/scripts/profiles/0.9.3-exp/install/Jerasure.install @@ -51,6 +51,7 @@ pkg_install() { ./configure --prefix="${INSTALL_DIR}" make -j"${CORES}" make install + mv ${INSTALL_DIR}/include/jerasure.h ${INSTALL_DIR}/include/jerasure/jerasure.h } pkg_check() { diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 08604a243..6ccb86eea 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -46,7 +46,7 @@ extern "C" { } #define GKFS_ENABLE_EC 1 #ifdef GKFS_ENABLE_EC -#include +#include #include #endif @@ -932,7 +932,7 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, // Write erasure std::string ecc_path = file->path() + "_ecc_" + to_string(i) + "_" + - to_string(i + data_servers); + to_string(i + data_servers - 1); for(int i = 0; i < CTX->get_replicas(); i++) { auto ecc_write = gkfs::rpc::ecc_forward_write( ecc_path, coding[i], gkfs::config::rpc::chunksize, @@ -1154,11 +1154,11 @@ gkfs_pread(std::shared_ptr file, char* buf, std::set failed; // set with failed targets. if(CTX->get_replicas() != 0) { - ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, 0, + ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, CTX->get_replicas(), failed); while(ret.first == EIO) { #ifdef GKFS_ENABLE_EC - LOG (WARNING,"failed to read"); + LOG(WARNING, "failed to read"); #else ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, CTX->get_replicas(), failed); diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 4a05ec3b9..1ba9b0576 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -40,7 +40,7 @@ #define GKFS_ENABLE_EC 1 #ifdef GKFS_ENABLE_EC -#include +#include #include #endif @@ -210,6 +210,130 @@ ecc_forward_write(const string& path, const void* buf, const size_t write_size, else return make_pair(0, out_size); } +/** + * Send an RPC request to read to a buffer. + * @param path + * @param buf + * @param offset + * @param read_size + * @param num_copies number of copies available (0 is no replication) + * @param failed nodes failed that should not be used + * @return pair + */ +pair +ecc_forward_read(const string& path, void* buf, const size_t read_size, + const int8_t server) { + + + std::vector read_bitset_vect(8, 0); + gkfs::rpc::set_bitset(read_bitset_vect, 0); + + // some helper variables for async RPCs + std::vector bufseq{ + hermes::mutable_buffer{buf, read_size}, + }; + + // expose user buffers so that they can serve as RDMA data targets + // (these are automatically "unexposed" when the destructor is called) + hermes::exposed_memory local_buffers; + + try { + local_buffers = ld_network_service->expose( + bufseq, hermes::access_mode::write_only); + + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to expose buffers for RMA"); + return make_pair(EBUSY, 0); + } + + std::vector> handles; + + // Issue non-blocking RPC requests and wait for the result later + // + // TODO(amiranda): This could be simplified by adding a vector of inputs + // to async_engine::broadcast(). This would allow us to avoid manually + // looping over handles as we do below + + auto target = server; + + auto endp = CTX->hosts().at(target); + + try { + + LOG(DEBUG, "Sending RPC ..."); + + gkfs::rpc::read_data::input in( + path, + // first offset in targets is the chunk with + // a potential offset + 0, target, CTX->hosts().size(), + gkfs::rpc::compressBitset(read_bitset_vect), + // number of chunks handled by that destination + 1, + // chunk start id of this write + 0, + // chunk end id of this write + 0, + // total size to write + read_size, local_buffers); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so + // that we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a + // post(endpoint) returning one result and a + // broadcast(endpoint_set) returning a result_set. When that + // happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + } catch(const std::exception& ex) { + LOG(ERROR, + "Unable to send non-blocking rpc for path \"{}\" " + "[peer: {}]", + path, target); + return make_pair(EBUSY, 0); + } + + // Wait for RPC responses and then get response and add it to out_size + // which is the read size. All potential outputs are served to free + // resources regardless of errors, although an errorcode is set. + auto err = 0; + ssize_t out_size = 0; + std::size_t idx = 0; + + for(const auto& h : handles) { + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + LOG(ERROR, "Daemon reported error: {}", out.err()); + err = out.err(); + } + + out_size += static_cast(out.io_size()); + + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to get rpc output for path \"{}\" [peer: {}]", + path, target); + err = EIO; + } + idx++; + } + + + /* + * Typically file systems return the size even if only a part of it was + * read. In our case, we do not keep track which daemon fully read its + * workload. Thus, we always return size 0 on error. + */ + if(err) + return make_pair(err, 0); + else + return make_pair(0, out_size); +} + // #endif /** @@ -456,11 +580,19 @@ bool gkfs_ecc_recover(const std::string& path, void* buffer_recover, uint64_t chunk_candidate, uint64_t failed_server) { - std::vector buffers(CTX->hosts().size(), - (char*) malloc(gkfs::config::rpc::chunksize)); auto data_servers = CTX->hosts().size() - CTX->get_replicas(); + char** data = (char**) malloc(sizeof(char*) * data_servers); + char** coding = (char**) malloc(sizeof(char*) * CTX->get_replicas()); + + for(auto i = 0; i < data_servers; ++i) { + data[i] = (char*) malloc(gkfs::config::rpc::chunksize); + } + for(auto i = 0; i < CTX->get_replicas(); ++i) { + coding[i] = (char*) malloc(gkfs::config::rpc::chunksize); + } + auto initial_row_chunk = (chunk_candidate / data_servers) * data_servers; @@ -472,55 +604,86 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, LOG(DEBUG, "Operation Size - Range {} - data_servers {} replica_servers {}", initial_row_chunk, data_servers, CTX->get_replicas()); - vector erased(CTX->hosts().size(), 1); + vector erased; auto i = initial_row_chunk; - for(auto j = i; j < i + data_servers; ++j) { + for(uint64_t j = 0; j < data_servers; ++j) { std::set failed; - LOG(DEBUG, "Reading Chunk {} -> {}, from server {} ", i, j, j - i); + LOG(DEBUG, "Reading Chunk {} -> {}, from server {}", i, i + j, j); auto out = gkfs::rpc::forward_read( - path, buffers[j - i], j * gkfs::config::rpc::chunksize, + path, data[j], (j + i) * gkfs::config::rpc::chunksize, gkfs::config::rpc::chunksize, 0, failed); if(out.first != 0) { LOG(ERROR, "Read Parity Error: {}", out.first); - erased[j - i] = 0; + erased.push_back(j); + } + } + { + uint64_t md5 = 0; + for(auto k = 0; k < gkfs::config::rpc::chunksize; k++) { + md5 += data[failed_server][k]; } + std::cout << "Content of the failed server? " << failed_server << " --> " + << md5 << std::endl; } - std::string ecc_path = - path + "_ecc_" + to_string(i) + "_" + to_string(i + data_servers); + // memset(data[failed_server], 3, gkfs::config::rpc::chunksize); + std::string ecc_path = path + "_ecc_" + to_string(i) + "_" + + to_string(i + data_servers - 1); - for(auto j = i + data_servers; j < i + CTX->hosts().size(); ++j) { - std::set failed; - LOG(DEBUG, "Reading EC Chunk {} {} -> {}, from server {} ", ecc_path, - i + data_servers, j, j - i + data_servers); + for(auto ecc_num = 0; ecc_num < CTX->get_replicas(); ++ecc_num) { + LOG(DEBUG, "Reading EC Chunk {} from server {} ", ecc_path, + ecc_num + data_servers); + auto out = gkfs::rpc::ecc_forward_read(ecc_path, coding[ecc_num], + gkfs::config::rpc::chunksize, + ecc_num + data_servers); - auto out = gkfs::rpc::forward_read( - ecc_path, buffers[j - i + data_servers], - j * gkfs::config::rpc::chunksize, gkfs::config::rpc::chunksize, - 0, failed); if(out.first != 0) { LOG(ERROR, "Read Parity Error: {}", out.first); - erased[j - i + data_servers] = 0; + erased.push_back(ecc_num + data_servers); + } else { + LOG(DEBUG, "Read EC Success"); } } + // We force a failure - erased[failed_server] = 1; + erased.push_back(failed_server); + erased.push_back(-1); + + + int res = 0; // We have all the data to recover the buffer auto matrix = reed_sol_vandermonde_coding_matrix(data_servers, CTX->get_replicas(), 8); - jerasure_matrix_decode(data_servers, CTX->get_replicas(), 8, matrix, 0, - erased.data(), buffers.data(), - buffers.data() + - gkfs::config::rpc::chunksize * data_servers, - gkfs::config::rpc::chunksize); - - memcpy(buffer_recover, - buffers.data() + erased.front() * gkfs::config::rpc::chunksize, - gkfs::config::rpc::chunksize); + + res = jerasure_matrix_decode(data_servers, CTX->get_replicas(), 8, matrix, + 1, erased.data(), data, coding, + gkfs::config::rpc::chunksize); + + std::cout << "recovered? Fails? " << failed_server << " -- " << res + << std::endl; + +{ + uint64_t md5 = 0; + for(auto k = 0; k < gkfs::config::rpc::chunksize; k++) { + md5 += data[failed_server][k]; + } + std::cout << "Content of the recovered server? " << failed_server << " --> " + << md5 << std::endl; + } + + memcpy(buffer_recover, data[failed_server], gkfs::config::rpc::chunksize); + + { + uint64_t md5 = 0; + for(auto i = 0; i < gkfs::config::rpc::chunksize; i++) { + md5 += ((char*) buffer_recover)[i]; + } + std::cout << "md5 recovered? " << md5 << std::endl; + } LOG(DEBUG, "EC computation finished"); return true; @@ -709,7 +872,7 @@ forward_read(const string& path, void* buf, const off64_t offset, LOG(ERROR, "Daemon reported error: {}", out.err()); err = out.err(); } - if(rand() % 2 == 0) { + if(rand() % 2 == 0 and num_copies > 0) { throw std::exception(); } out_size += static_cast(out.io_size()); @@ -747,7 +910,8 @@ forward_read(const string& path, void* buf, const off64_t offset, // We have a chunk to recover // We don't need to worry about offset etc... just use the chunk // number - void* recovered_chunk = malloc(gkfs::config::rpc::chunksize); + char* recovered_chunk = + (char*) malloc(gkfs::config::rpc::chunksize); gkfs::rpc::gkfs_ecc_recover(path, recovered_chunk, chnk_id_file, failed_server); @@ -779,10 +943,15 @@ forward_read(const string& path, void* buf, const off64_t offset, LOG(DEBUG, "Recovered chunk : Start Offset {}/OffsetChunk {} - Size {}", recover_offt, recover_offt_chunk, recover_size); + std::cout << "Recovered " << recover_offt << " -- " + << recover_offt_chunk << " --- size " << recover_size + << std::endl; memcpy((char*) buf + recover_offt, (char*) recovered_chunk + recover_offt_chunk, recover_size); + free(recovered_chunk); } + #endif } idx++; -- GitLab From 241a9a8eec0225fcba27a0d7d04692cd28d743ad Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Tue, 27 Jun 2023 08:40:37 +0200 Subject: [PATCH 11/17] Update scripts tests --- src/client/gkfs_functions.cpp | 10 ++++----- src/client/rpc/forward_data.cpp | 21 +++++++++---------- .../compile_dep.sh/0.9.3-exp/agios.out | 12 +++++++++++ .../scripts/compile_dep.sh/0.9.3-exp/all.out | 17 +++++++++++++++ tests/scripts/compile_dep.sh/0.9.3-exp/ci.out | 9 ++++++++ .../compile_dep.sh/0.9.3-exp/default.out | 10 +++++++++ .../compile_dep.sh/0.9.3-exp/direct.out | 6 ++++++ .../0.9.3-exp/infiniband_verbs.out | 10 +++++++++ .../compile_dep.sh/0.9.3-exp/marenostrum4.out | 13 ++++++++++++ .../compile_dep.sh/0.9.3-exp/mogon2.out | 13 ++++++++++++ .../scripts/compile_dep.sh/0.9.3-exp/ngio.out | 13 ++++++++++++ .../0.9.3-exp/omnipath_psm2.out | 12 +++++++++++ tests/scripts/compile_dep.sh/0.9.3-exp/p9.out | 12 +++++++++++ tests/scripts/dl_dep.sh/0.9.3-exp/agios.out | 12 +++++++++++ tests/scripts/dl_dep.sh/0.9.3-exp/all.out | 16 ++++++++++++++ tests/scripts/dl_dep.sh/0.9.3-exp/ci.out | 10 +++++++++ tests/scripts/dl_dep.sh/0.9.3-exp/default.out | 11 ++++++++++ tests/scripts/dl_dep.sh/0.9.3-exp/direct.out | 7 +++++++ .../dl_dep.sh/0.9.3-exp/infiniband_verbs.out | 11 ++++++++++ .../dl_dep.sh/0.9.3-exp/marenostrum4.out | 13 ++++++++++++ tests/scripts/dl_dep.sh/0.9.3-exp/mogon2.out | 13 ++++++++++++ tests/scripts/dl_dep.sh/0.9.3-exp/ngio.out | 13 ++++++++++++ .../dl_dep.sh/0.9.3-exp/omnipath_psm2.out | 12 +++++++++++ tests/scripts/dl_dep.sh/0.9.3-exp/p9.out | 12 +++++++++++ 24 files changed, 272 insertions(+), 16 deletions(-) create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/agios.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/all.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/ci.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/default.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/direct.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/infiniband_verbs.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/marenostrum4.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/mogon2.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/ngio.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/omnipath_psm2.out create mode 100644 tests/scripts/compile_dep.sh/0.9.3-exp/p9.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/agios.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/all.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/ci.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/default.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/direct.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/infiniband_verbs.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/marenostrum4.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/mogon2.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/ngio.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/omnipath_psm2.out create mode 100644 tests/scripts/dl_dep.sh/0.9.3-exp/p9.out diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 6ccb86eea..7893b5758 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -44,7 +44,7 @@ extern "C" { #include #include } -#define GKFS_ENABLE_EC 1 + #ifdef GKFS_ENABLE_EC #include #include @@ -863,7 +863,7 @@ gkfs_dup2(const int oldfd, const int newfd) { return CTX->file_map()->dup2(oldfd, newfd); } - +#ifdef GKFS_ENABLE_EC bool gkfs_ecc_write(std::shared_ptr file, size_t count, off64_t offset, off64_t updated_size) { @@ -951,7 +951,7 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, } return true; } - +#endif /** * Wrapper function for all gkfs write operations * errno may be set @@ -1154,8 +1154,8 @@ gkfs_pread(std::shared_ptr file, char* buf, std::set failed; // set with failed targets. if(CTX->get_replicas() != 0) { - ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, CTX->get_replicas(), - failed); + ret = gkfs::rpc::forward_read(file->path(), buf, offset, count, + CTX->get_replicas(), failed); while(ret.first == EIO) { #ifdef GKFS_ENABLE_EC LOG(WARNING, "failed to read"); diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 1ba9b0576..0368edd1c 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -38,7 +38,6 @@ #include -#define GKFS_ENABLE_EC 1 #ifdef GKFS_ENABLE_EC #include #include @@ -572,7 +571,7 @@ forward_write(const string& path, const void* buf, const off64_t offset, return make_pair(0, out_size); } - +#ifdef GKFS_ENABLE_EC // To recover a missing chunk, we need to read all the remaining // And apply the reconstruction function. // This function is similar to the creation function @@ -586,7 +585,7 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, char** data = (char**) malloc(sizeof(char*) * data_servers); char** coding = (char**) malloc(sizeof(char*) * CTX->get_replicas()); - for(auto i = 0; i < data_servers; ++i) { + for(unsigned int i = 0; i < data_servers; ++i) { data[i] = (char*) malloc(gkfs::config::rpc::chunksize); } for(auto i = 0; i < CTX->get_replicas(); ++i) { @@ -625,11 +624,11 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, for(auto k = 0; k < gkfs::config::rpc::chunksize; k++) { md5 += data[failed_server][k]; } - std::cout << "Content of the failed server? " << failed_server << " --> " - << md5 << std::endl; + std::cout << "Content of the failed server? " << failed_server + << " --> " << md5 << std::endl; } - // memset(data[failed_server], 3, gkfs::config::rpc::chunksize); + // memset(data[failed_server], 3, gkfs::config::rpc::chunksize); std::string ecc_path = path + "_ecc_" + to_string(i) + "_" + to_string(i + data_servers - 1); @@ -666,13 +665,13 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, std::cout << "recovered? Fails? " << failed_server << " -- " << res << std::endl; -{ + { uint64_t md5 = 0; for(auto k = 0; k < gkfs::config::rpc::chunksize; k++) { md5 += data[failed_server][k]; } - std::cout << "Content of the recovered server? " << failed_server << " --> " - << md5 << std::endl; + std::cout << "Content of the recovered server? " << failed_server + << " --> " << md5 << std::endl; } memcpy(buffer_recover, data[failed_server], gkfs::config::rpc::chunksize); @@ -687,8 +686,8 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, LOG(DEBUG, "EC computation finished"); return true; -} // namespace gkfs::rpc - +} +#endif /** * Send an RPC request to read to a buffer. diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/agios.out b/tests/scripts/compile_dep.sh/0.9.3-exp/agios.out new file mode 100644 index 000000000..b098aecfa --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/agios.out @@ -0,0 +1,12 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### +######## Installing: agios ############################### + diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/all.out b/tests/scripts/compile_dep.sh/0.9.3-exp/all.out new file mode 100644 index 000000000..4e2d61993 --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/all.out @@ -0,0 +1,17 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: psm2 ############################### +WARNING: Install script for 'psm2' not found. Skipping. +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### +######## Installing: agios ############################### +######## Installing: parallax ############################### +######## Installing: gf-complete ############################### +######## Installing: Jerasure ############################### +Done \ No newline at end of file diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/ci.out b/tests/scripts/compile_dep.sh/0.9.3-exp/ci.out new file mode 100644 index 000000000..d3c276800 --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/ci.out @@ -0,0 +1,9 @@ +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### +######## Installing: agios ############################### +######## Installing: parallax ############################### diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/default.out b/tests/scripts/compile_dep.sh/0.9.3-exp/default.out new file mode 100644 index 000000000..787e4a08f --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/default.out @@ -0,0 +1,10 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/direct.out b/tests/scripts/compile_dep.sh/0.9.3-exp/direct.out new file mode 100644 index 000000000..816266773 --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/direct.out @@ -0,0 +1,6 @@ +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/infiniband_verbs.out b/tests/scripts/compile_dep.sh/0.9.3-exp/infiniband_verbs.out new file mode 100644 index 000000000..e82dd020f --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/infiniband_verbs.out @@ -0,0 +1,10 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: libfabric%verbs ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/marenostrum4.out b/tests/scripts/compile_dep.sh/0.9.3-exp/marenostrum4.out new file mode 100644 index 000000000..38f67ce3d --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/marenostrum4.out @@ -0,0 +1,13 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: psm2 ############################### +WARNING: Install script for 'psm2' not found. Skipping. +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### +######## Installing: parallax ############################### diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/mogon2.out b/tests/scripts/compile_dep.sh/0.9.3-exp/mogon2.out new file mode 100644 index 000000000..38f67ce3d --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/mogon2.out @@ -0,0 +1,13 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: psm2 ############################### +WARNING: Install script for 'psm2' not found. Skipping. +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### +######## Installing: parallax ############################### diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/ngio.out b/tests/scripts/compile_dep.sh/0.9.3-exp/ngio.out new file mode 100644 index 000000000..38f67ce3d --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/ngio.out @@ -0,0 +1,13 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: psm2 ############################### +WARNING: Install script for 'psm2' not found. Skipping. +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### +######## Installing: parallax ############################### diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/omnipath_psm2.out b/tests/scripts/compile_dep.sh/0.9.3-exp/omnipath_psm2.out new file mode 100644 index 000000000..7138da9e2 --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/omnipath_psm2.out @@ -0,0 +1,12 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: psm2 ############################### +WARNING: Install script for 'psm2' not found. Skipping. +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### diff --git a/tests/scripts/compile_dep.sh/0.9.3-exp/p9.out b/tests/scripts/compile_dep.sh/0.9.3-exp/p9.out new file mode 100644 index 000000000..7138da9e2 --- /dev/null +++ b/tests/scripts/compile_dep.sh/0.9.3-exp/p9.out @@ -0,0 +1,12 @@ +######## Installing: lz4 ############################### +######## Installing: capstone ############################### +######## Installing: json-c ############################### +######## Installing: psm2 ############################### +WARNING: Install script for 'psm2' not found. Skipping. +######## Installing: libfabric ############################### +######## Installing: mercury ############################### +######## Installing: argobots ############################### +######## Installing: margo ############################### +######## Installing: rocksdb ############################### +######## Installing: syscall_intercept ############################### +######## Installing: date ############################### diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/agios.out b/tests/scripts/dl_dep.sh/0.9.3-exp/agios.out new file mode 100644 index 000000000..bf5455ca2 --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/agios.out @@ -0,0 +1,12 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Cloned 'https://github.com/francielizanon/agios.git' to 'agios' with commit '[c26a6544200f823ebb8f890dd94e653d148bf226]' and flags '--branch=development' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/all.out b/tests/scripts/dl_dep.sh/0.9.3-exp/all.out new file mode 100644 index 000000000..726ea69af --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/all.out @@ -0,0 +1,16 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Downloaded 'https://github.com/intel/opa-psm2/archive/PSM2_11.2.185.tar.gz' to 'psm2' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Cloned 'https://github.com/francielizanon/agios.git' to 'agios' with commit '[c26a6544200f823ebb8f890dd94e653d148bf226]' and flags '--branch=development' +Cloned 'https://github.com/CARV-ICS-FORTH/parallax.git' to 'parallax' with commit '[ffdea6e820f5c4c2d33e60d9a4b15ef9e6bbcfdd]' and flags '' +Cloned 'https://github.com/ceph/gf-complete' to 'gf-complete' with commit '[a6862d10c9db467148f20eef2c6445ac9afd94d8]' +Cloned 'https://github.com/tsuraan/Jerasure' to 'Jerasure' with commit '[414c96ef2b9934953b6facb31d803d79b1dd1405]' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/ci.out b/tests/scripts/dl_dep.sh/0.9.3-exp/ci.out new file mode 100644 index 000000000..4ee089ef7 --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/ci.out @@ -0,0 +1,10 @@ +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Cloned 'https://github.com/francielizanon/agios.git' to 'agios' with commit '[c26a6544200f823ebb8f890dd94e653d148bf226]' and flags '--branch=development' +Cloned 'https://github.com/CARV-ICS-FORTH/parallax.git' to 'parallax' with commit '[ffdea6e820f5c4c2d33e60d9a4b15ef9e6bbcfdd]' and flags '' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/default.out b/tests/scripts/dl_dep.sh/0.9.3-exp/default.out new file mode 100644 index 000000000..a68701cf0 --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/default.out @@ -0,0 +1,11 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/direct.out b/tests/scripts/dl_dep.sh/0.9.3-exp/direct.out new file mode 100644 index 000000000..e27eae530 --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/direct.out @@ -0,0 +1,7 @@ +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/infiniband_verbs.out b/tests/scripts/dl_dep.sh/0.9.3-exp/infiniband_verbs.out new file mode 100644 index 000000000..6ebc289ff --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/infiniband_verbs.out @@ -0,0 +1,11 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric%verbs' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/marenostrum4.out b/tests/scripts/dl_dep.sh/0.9.3-exp/marenostrum4.out new file mode 100644 index 000000000..657154fdc --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/marenostrum4.out @@ -0,0 +1,13 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Downloaded 'https://github.com/intel/opa-psm2/archive/PSM2_11.2.185.tar.gz' to 'psm2' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Cloned 'https://github.com/CARV-ICS-FORTH/parallax.git' to 'parallax' with commit '[c130decd7a71c60c20b98d6a23924f05f754c3cd]' and flags '' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/mogon2.out b/tests/scripts/dl_dep.sh/0.9.3-exp/mogon2.out new file mode 100644 index 000000000..657154fdc --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/mogon2.out @@ -0,0 +1,13 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Downloaded 'https://github.com/intel/opa-psm2/archive/PSM2_11.2.185.tar.gz' to 'psm2' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Cloned 'https://github.com/CARV-ICS-FORTH/parallax.git' to 'parallax' with commit '[c130decd7a71c60c20b98d6a23924f05f754c3cd]' and flags '' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/ngio.out b/tests/scripts/dl_dep.sh/0.9.3-exp/ngio.out new file mode 100644 index 000000000..657154fdc --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/ngio.out @@ -0,0 +1,13 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Downloaded 'https://github.com/intel/opa-psm2/archive/PSM2_11.2.185.tar.gz' to 'psm2' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Cloned 'https://github.com/CARV-ICS-FORTH/parallax.git' to 'parallax' with commit '[c130decd7a71c60c20b98d6a23924f05f754c3cd]' and flags '' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/omnipath_psm2.out b/tests/scripts/dl_dep.sh/0.9.3-exp/omnipath_psm2.out new file mode 100644 index 000000000..d26b498ca --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/omnipath_psm2.out @@ -0,0 +1,12 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Downloaded 'https://github.com/intel/opa-psm2/archive/PSM2_11.2.185.tar.gz' to 'psm2' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[2c8765fa292bc9c28a22624c528580d54658813d]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Done \ No newline at end of file diff --git a/tests/scripts/dl_dep.sh/0.9.3-exp/p9.out b/tests/scripts/dl_dep.sh/0.9.3-exp/p9.out new file mode 100644 index 000000000..3a99bb523 --- /dev/null +++ b/tests/scripts/dl_dep.sh/0.9.3-exp/p9.out @@ -0,0 +1,12 @@ +Downloaded 'https://github.com/lz4/lz4/archive/v1.9.3.tar.gz' to 'lz4' +Downloaded 'https://github.com/aquynh/capstone/archive/4.0.2.tar.gz' to 'capstone' +Downloaded 'https://github.com/json-c/json-c/archive/json-c-0.15-20200726.tar.gz' to 'json-c' +Downloaded 'https://github.com/intel/opa-psm2/archive/PSM2_11.2.185.tar.gz' to 'psm2' +Cloned 'https://github.com/ofiwg/libfabric.git' to 'libfabric' with commit '[HEAD]' and flags '--branch=v1.13.2' +Cloned 'https://github.com/mercury-hpc/mercury' to 'mercury' with commit '[v2.1.0]' and flags '--recurse-submodules' +Downloaded 'https://github.com/pmodels/argobots/archive/v1.1.tar.gz' to 'argobots' +Cloned 'https://github.com/mochi-hpc/mochi-margo' to 'margo' with commit '[v0.9.6]' and flags '' +Downloaded 'https://github.com/facebook/rocksdb/archive/v6.26.1.tar.gz' to 'rocksdb' +Cloned 'https://github.com/GekkoFS/syscall_intercept.git' to 'syscall_intercept' with commit '[6eb27a9d2053bb2ac3bb9ce30e13b64ce055c19f]' and flags '' +Cloned 'https://github.com/HowardHinnant/date.git' to 'date' with commit '[e7e1482087f58913b80a20b04d5c58d9d6d90155]' and flags '' +Done -- GitLab From 0464d15923ac07c63808603b54553e2e6bc26908 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Tue, 27 Jun 2023 11:19:26 +0200 Subject: [PATCH 12/17] Update CI and dockers for EC Working recovery EC Polishing touches for Erasure codes --- .gitlab-ci.yml | 22 +++---- CHANGELOG.md | 2 +- CMake/gkfs-options.cmake | 6 ++ CMakePresets.json | 3 +- README.md | 5 +- docker/0.9.3-exp/core/Dockerfile | 40 +++++++++++++ docker/0.9.3-exp/core/Makefile | 10 ++++ docker/0.9.3-exp/deps/Dockerfile | 38 ++++++++++++ docker/0.9.3-exp/deps/Makefile | 23 +++++++ docker/0.9.3-exp/docs/Dockerfile | 34 +++++++++++ docker/0.9.3-exp/docs/Makefile | 10 ++++ docker/0.9.3-exp/linter/Dockerfile | 19 ++++++ docker/0.9.3-exp/linter/Makefile | 10 ++++ docker/0.9.3-exp/testing/Dockerfile | 32 ++++++++++ docker/0.9.3-exp/testing/Makefile | 10 ++++ scripts/profiles/0.9.3-exp/ci.specs | 4 +- src/client/gkfs_functions.cpp | 61 ++++++++++++------- src/client/rpc/forward_data.cpp | 93 ++++++++++++----------------- 18 files changed, 331 insertions(+), 91 deletions(-) create mode 100644 docker/0.9.3-exp/core/Dockerfile create mode 100644 docker/0.9.3-exp/core/Makefile create mode 100644 docker/0.9.3-exp/deps/Dockerfile create mode 100644 docker/0.9.3-exp/deps/Makefile create mode 100644 docker/0.9.3-exp/docs/Dockerfile create mode 100644 docker/0.9.3-exp/docs/Makefile create mode 100644 docker/0.9.3-exp/linter/Dockerfile create mode 100644 docker/0.9.3-exp/linter/Makefile create mode 100644 docker/0.9.3-exp/testing/Dockerfile create mode 100644 docker/0.9.3-exp/testing/Makefile diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 68eaca4ed..3786a35f7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -24,14 +24,14 @@ variables: GIT_SUBMODULE_STRATEGY: recursive # base image -image: gekkofs/core:0.9.2 +image: gekkofs/core:0.9.3-exp ################################################################################ ## Validating ################################################################################ check format: stage: lint - image: gekkofs/linter:0.9.2 + image: gekkofs/linter:0.9.3-exp needs: [] script: - ${SCRIPTS_DIR}/check_format.sh @@ -45,7 +45,7 @@ check format: ################################################################################ gkfs: stage: build - image: gekkofs/deps:0.9.2 + image: gekkofs/deps:0.9.3-exp interruptible: true needs: [] script: @@ -67,7 +67,7 @@ gkfs: gkfwd: stage: build - image: gekkofs/deps:0.9.2 + image: gekkofs/deps:0.9.3-exp interruptible: true needs: [] script: @@ -89,7 +89,7 @@ gkfwd: ## == tests for scripts ==================== scripts: stage: test - image: gekkofs/testing:0.9.2 + image: gekkofs/testing:0.9.3-exp needs: [] script: - mkdir -p ${BUILD_PATH}/tests/scripts @@ -105,7 +105,7 @@ scripts: ## == integration tests for gkfs =========== gkfs:integration: stage: test - image: gekkofs/testing:0.9.2 + image: gekkofs/testing:0.9.3-exp interruptible: true needs: ['gkfs'] parallel: @@ -154,7 +154,7 @@ gkfs:integration: ## == integration tests for gkfwd ========== gkfwd:integration: stage: test - image: gekkofs/testing:0.9.2 + image: gekkofs/testing:0.9.3-exp interruptible: true needs: ['gkfwd'] parallel: @@ -204,7 +204,7 @@ gkfwd:integration: ## == unit tests for gkfs ================== gkfs:unit: stage: test - image: gekkofs/testing:0.9.2 + image: gekkofs/testing:0.9.3-exp needs: ['gkfs'] script: ## Add path to mkfs.kreon @@ -242,7 +242,7 @@ gkfs:unit: ################################################################################ documentation: stage: docs - image: gekkofs/docs:0.9.2 + image: gekkofs/docs:0.9.3-exp needs: [] rules: # we only build the documentation automatically if we are on the @@ -272,7 +272,7 @@ documentation: ## == coverage baseline ==================== coverage:baseline: stage: report - image: gekkofs/testing:0.9.2 + image: gekkofs/testing:0.9.3-exp interruptible: true needs: ['gkfs', 'gkfwd'] @@ -298,7 +298,7 @@ coverage:baseline: coverage: stage: report - image: gekkofs/testing:0.9.2 + image: gekkofs/testing:0.9.3-exp # needs: [ 'coverage:baseline', 'gkfs:integration', 'gkfwd:integration', # 'gkfs:unit' ] needs: [ 'coverage:baseline', 'gkfs:integration', 'gkfs:unit' ] diff --git a/CHANGELOG.md b/CHANGELOG.md index db3a8e823..61982e61d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). replicas ([!166](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/141)). - Modified write and reads to use a bitset instead of the traditional hash per chunk in the server. - Added reattemp support in get_fs_config to other servers, when the initial server fails. -- Added support for Erasure codes ([!1xx]) +- Added support for Erasure codes ([!168]) using Jerasure lib and adding support for Read error injection. ### New - Additional tests to increase code coverage ([!141](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/141)). diff --git a/CMake/gkfs-options.cmake b/CMake/gkfs-options.cmake index fbed15b8f..775b9275c 100644 --- a/CMake/gkfs-options.cmake +++ b/CMake/gkfs-options.cmake @@ -381,3 +381,9 @@ gkfs_define_option( DESCRIPTION "Use Jerasure for erasure codes reliability" ) +gkfs_define_option( + GKFS_ENABLE_READ_ERRORS + HELP_TEXT "Enable Read Errors using replication" + DEFAULT_VALUE OFF + DESCRIPTION "Inject read errors" +) diff --git a/CMakePresets.json b/CMakePresets.json index 88fc9ce80..819894f00 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -86,7 +86,8 @@ "GKFS_ENABLE_PROMETHEUS": true, "GKFS_RENAME_SUPPORT": true, "MAX_OPEN_FDS": "10000", - "MAX_INTERNAL_FDS": "1024" + "MAX_INTERNAL_FDS": "1024", + "GKFS_ENABLE_EC": true } }, { diff --git a/README.md b/README.md index 3ac2dba98..e7555635f 100644 --- a/README.md +++ b/README.md @@ -326,10 +326,13 @@ The number of replicas should go from 0 to the number of servers-1. The replication environment variable can be set up for each client, independently. ### Erasure codes -The user can enable resilience with erasure codes with -DGKFS_ENABLE_EC +The user can enable resilience with erasure codes with `-DGKFS_ENABLE_EC` Using `LIBGKFS_NUM_REPL=`, the user can define the number of EC servers. The total servers available for data will be -> total servervs - num_repl +Additionaly the user can enable the `-DGKFS_ENABLE_READ_ERRORS` to inject a +50% of read errors into the workflow. + ## Acknowledgment This software was partially supported by the EC H2020 funded NEXTGenIO project (Project ID: 671951, www.nextgenio.eu). diff --git a/docker/0.9.3-exp/core/Dockerfile b/docker/0.9.3-exp/core/Dockerfile new file mode 100644 index 000000000..a55434924 --- /dev/null +++ b/docker/0.9.3-exp/core/Dockerfile @@ -0,0 +1,40 @@ +FROM debian:bullseye-slim + +LABEL Description="Debian-based environment suitable to build GekkoFS and its dependencies" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + curl \ + ca-certificates \ + libtool \ + pkg-config \ + make \ + automake \ + gcc \ + g++ \ + ninja-build \ + procps \ + # AGIOS dependencies + libconfig-dev \ + # Mercury dependencies + libltdl-dev \ + lbzip2 \ + # Margo dependencies \ + libjson-c-dev \ + # RocksDB dependencies + liblz4-dev \ + # syscall_intercept dependencies + libcapstone-dev \ + # GekkoFS dependencies + libboost-program-options-dev \ + uuid-dev && \ + # install cmake 3.14+ since it's needed for some dependencies + curl -OL https://github.com/Kitware/CMake/releases/download/v3.25.2/cmake-3.25.2-Linux-x86_64.sh && \ + chmod u+x ./cmake-3.25.2-Linux-x86_64.sh && \ + ./cmake-3.25.2-Linux-x86_64.sh --skip-license --prefix=/usr && \ + # Clean apt cache to reduce image layer size + rm -rf /var/lib/apt/lists/* && \ + # Clean apt caches of packages + apt-get clean && apt-get autoclean && \ + rm ./cmake-3.25.2-Linux-x86_64.sh diff --git a/docker/0.9.3-exp/core/Makefile b/docker/0.9.3-exp/core/Makefile new file mode 100644 index 000000000..e11d6cd4b --- /dev/null +++ b/docker/0.9.3-exp/core/Makefile @@ -0,0 +1,10 @@ +.PHONY: all + +amd64: + docker build --platform amd64 -t gekkofs/core:0.9.3-exp . + +aarch64: + docker build --platform aarch64 -t gekkofs/core:0.9.3-exp . + +all: + docker build -t gekkofs/core:0.9.3-exp . diff --git a/docker/0.9.3-exp/deps/Dockerfile b/docker/0.9.3-exp/deps/Dockerfile new file mode 100644 index 000000000..0ea72dfa9 --- /dev/null +++ b/docker/0.9.3-exp/deps/Dockerfile @@ -0,0 +1,38 @@ +FROM gekkofs/core:0.9.3-exp + +LABEL Description="Debian-based environment to build GekkoFS" + +ENV GKFS_PATH /opt/gkfs +ENV GKFS_VERSION 0.9.3-exp + +ENV SCRIPTS_PATH ${GKFS_PATH}/scripts +ENV DEPS_SRC_PATH ${GKFS_PATH}/deps_src +ENV INSTALL_PATH /usr/local + +COPY scripts/dl_dep.sh ${SCRIPTS_PATH}/ +COPY scripts/compile_dep.sh ${SCRIPTS_PATH}/ +COPY scripts/patches ${SCRIPTS_PATH}/patches +COPY scripts/profiles ${SCRIPTS_PATH}/profiles + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + python3-dev \ + python3-venv \ + python3-setuptools \ + libnuma-dev libyaml-dev libcurl4-openssl-dev \ + procps && \ + python3 -m pip install --upgrade pip && \ + rm -rf /var/lib/apt/lists/* && \ + apt-get clean && apt-get autoclean + +# Download and build dependencies +RUN cd ${SCRIPTS_PATH} && \ + /bin/bash ./dl_dep.sh -p ci:${GKFS_VERSION} ${DEPS_SRC_PATH} && \ + /bin/bash ./compile_dep.sh -j 8 -p ci:${GKFS_VERSION} ${DEPS_SRC_PATH} ${INSTALL_PATH} && \ + cp ${DEPS_SRC_PATH}/parallax/lib/include/parallax/structures.h ${INSTALL_PATH}/include/ &&\ + rm -rf ${DEPS_SRC_PATH} && \ + rm -rf ${SCRIPTS_PATH} && \ + rmdir ${GKFS_PATH} && \ + ldconfig diff --git a/docker/0.9.3-exp/deps/Makefile b/docker/0.9.3-exp/deps/Makefile new file mode 100644 index 000000000..41a10361e --- /dev/null +++ b/docker/0.9.3-exp/deps/Makefile @@ -0,0 +1,23 @@ +CWD:=$(shell pwd) +GIT_ROOT:=$(shell git rev-parse --show-toplevel) + +.PHONY: all build mount-scripts umount-scripts + +all: build mount-scripts remove-scripts +amd64: build-amd64 mount-scripts remove-scripts +aarch64: build-aarch64 mount-scripts remove-scripts + +copy-scripts: + cp -R $(GIT_ROOT)/scripts $(CWD)/scripts + +build: copy-scripts + docker build -t gekkofs/deps:0.9.3-exp . + +build-amd64: copy-scripts + docker build --platform amd64 -t gekkofs/deps:0.9.3-exp . + +build-aarch64: copy-scripts + docker build --platform aarch64 -t gekkofs/deps:0.9.3-exp . + +remove-scripts: + - rm -rf $(CWD)/scripts diff --git a/docker/0.9.3-exp/docs/Dockerfile b/docker/0.9.3-exp/docs/Dockerfile new file mode 100644 index 000000000..55a44fdc1 --- /dev/null +++ b/docker/0.9.3-exp/docs/Dockerfile @@ -0,0 +1,34 @@ +FROM gekkofs/deps:0.9.3-exp + +LABEL Description="Debian-based environment suitable to build GekkoFS' documentation" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + # install dependencies for Doxygen + python \ + flex \ + bison \ + graphviz && \ + # install doxygen (repo version is kind of old) + cd /tmp && curl -L https://sourceforge.net/projects/doxygen/files/rel-1.9.2/doxygen-1.9.2.src.tar.gz/download --output doxygen-1.9.2.src.tar.gz && \ + tar xvfz /tmp/doxygen-1.9.2.src.tar.gz && \ + mkdir -p /tmp/doxygen-1.9.2/build && \ + cd /tmp/doxygen-1.9.2/build && \ + cmake -G "Unix Makefiles" .. && \ + make -j8 install && \ + # install sphinx, breathe and exhale + pip3 install \ + 'sphinx==4.4.0' \ + sphinx_rtd_theme \ + 'breathe==4.33.1' \ + 'exhale==0.3.1' \ + 'sphinx-copybutton==0.5.0' \ + 'sphinx-multiversion==0.2.4' \ + 'myst_parser==0.17.0' \ + attrs && \ + # Clean apt cache to reduce image layer size + rm -rf /var/lib/apt/lists/* && \ + rm -rf /tmp/doxygen-1.9.2 && \ + rm /tmp/doxygen-1.9.2.src.tar.gz && \ + # Clean apt caches of packages + apt-get clean && apt-get autoclean diff --git a/docker/0.9.3-exp/docs/Makefile b/docker/0.9.3-exp/docs/Makefile new file mode 100644 index 000000000..e7608ea74 --- /dev/null +++ b/docker/0.9.3-exp/docs/Makefile @@ -0,0 +1,10 @@ +.PHONY: all + +amd64: + docker build --platform amd64 -t gekkofs/docs:0.9.3-exp . + +aarch64: + docker build --platform aarch64 -t gekkofs/docs:0.9.3-exp . + +all: + docker build -t gekkofs/docs:0.9.3-exp . \ No newline at end of file diff --git a/docker/0.9.3-exp/linter/Dockerfile b/docker/0.9.3-exp/linter/Dockerfile new file mode 100644 index 000000000..82d87f86d --- /dev/null +++ b/docker/0.9.3-exp/linter/Dockerfile @@ -0,0 +1,19 @@ +FROM gekkofs/core:0.9.3-exp + +LABEL Description="Debian-based environment to check the formatting of GekkoFS code" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + # clang 15 deps for clang-format + lsb-release \ + wget \ + software-properties-common \ + gnupg2 && \ + # add clang-15 repos + wget https://apt.llvm.org/llvm.sh -P /tmp && chmod +x /tmp/llvm.sh && /tmp/llvm.sh 15 && \ + # install clang-format + apt-get update && apt-get install -y --no-install-recommends clang-format-15 && \ + # Clean apt cache to reduce image layer size + rm -rf /var/lib/apt/lists/* && rm /tmp/llvm.sh && \ + # Clean apt caches of packages + apt-get clean && apt-get autoclean diff --git a/docker/0.9.3-exp/linter/Makefile b/docker/0.9.3-exp/linter/Makefile new file mode 100644 index 000000000..98f344150 --- /dev/null +++ b/docker/0.9.3-exp/linter/Makefile @@ -0,0 +1,10 @@ +.PHONY: all + +amd64: + docker build --platform amd64 -t gekkofs/linter:0.9.3-exp . + +aarch64: + docker build --platform aarch64 -t gekkofs/linter:0.9.3-exp . + +all: + docker build -t gekkofs/linter:0.9.3-exp . diff --git a/docker/0.9.3-exp/testing/Dockerfile b/docker/0.9.3-exp/testing/Dockerfile new file mode 100644 index 000000000..250fea63c --- /dev/null +++ b/docker/0.9.3-exp/testing/Dockerfile @@ -0,0 +1,32 @@ +FROM gekkofs/deps:0.9.3-exp + +LABEL Description="Debian-based environment to test GekkoFS" + +RUN \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + # required by lcov's genhtml + libgd-perl && \ + # install cmake 3.21+ since we need to produce JUnit XML files + curl -OL https://github.com/Kitware/CMake/releases/download/v3.25.2/cmake-3.25.2-Linux-x86_64.sh && \ + chmod u+x ./cmake-3.25.2-Linux-x86_64.sh && \ + ./cmake-3.25.2-Linux-x86_64.sh --skip-license --prefix=/usr && \ + # install loguru + # (required by several of our scripts) + pip3 install loguru && \ + # install lcov_cobertura + # (required to produce Cobertura XML reports) + pip3 install lcov_cobertura && \ + # install lcov + # (required to produce partial coverage reports in parallel runs) + curl -OL https://github.com/linux-test-project/lcov/releases/download/v1.16/lcov-1.16.tar.gz && \ + tar xfz lcov-1.16.tar.gz && \ + cd lcov-1.16 && \ + make install && \ + cd .. && \ + # cleanup + rm -rf /var/lib/apt/lists/* && \ + apt-get clean && \ + apt-get autoclean && \ + rm ./cmake-3.25.2-Linux-x86_64.sh && \ + rm -rf ./lcov-1.16.* diff --git a/docker/0.9.3-exp/testing/Makefile b/docker/0.9.3-exp/testing/Makefile new file mode 100644 index 000000000..26551ad43 --- /dev/null +++ b/docker/0.9.3-exp/testing/Makefile @@ -0,0 +1,10 @@ +.PHONY: all + +amd64: + docker build --platform amd64 -t gekkofs/testing:0.9.3-exp . + +aarch64: + docker build --platform aarch64 -t gekkofs/testing:0.9.3-exp . + +all: + docker build -t gekkofs/testing:0.9.3-exp . diff --git a/scripts/profiles/0.9.3-exp/ci.specs b/scripts/profiles/0.9.3-exp/ci.specs index aec646112..1266c4f33 100644 --- a/scripts/profiles/0.9.3-exp/ci.specs +++ b/scripts/profiles/0.9.3-exp/ci.specs @@ -51,6 +51,8 @@ clonedeps=( ["date"]="e7e1482087f58913b80a20b04d5c58d9d6d90155" ["agios"]="c26a6544200f823ebb8f890dd94e653d148bf226@development" ["parallax"]="ffdea6e820f5c4c2d33e60d9a4b15ef9e6bbcfdd" + ["gf-complete"]="a6862d10c9db467148f20eef2c6445ac9afd94d8" + ["Jerasure"]="414c96ef2b9934953b6facb31d803d79b1dd1405" ) # Extra arguments for git clone @@ -66,7 +68,7 @@ clonedeps_patches=( # Ordering that MUST be followed when downloading order=( "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" - "date" "agios" "parallax" "prometheus-cpp" + "date" "agios" "parallax" "prometheus-cpp" "gf-complete" "Jerasure" ) # Extra arguments passed to the installation script. As such, they can diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 7893b5758..2d9612f42 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -864,6 +864,24 @@ gkfs_dup2(const int oldfd, const int newfd) { } #ifdef GKFS_ENABLE_EC +/** + * @brief Compute and store the erasure codes of a chunk line, smaller files are + ignored. The ec are stored in [path]ecc_chunkstart_chunkend files + * + * @param file + * @param count + * @param offset + * @param updated_size + * @return true if the computation was successful + * @return false if the computation was not successful + * + * For each chunk we will have a set of chunks involved on that calculation + [0] [1] [2] [3] [4] [n-p] [p1] [p2] + [n-p+1] .... + i.e. : [0] -> 1,2,3,4,n-p + i.e : [4] -> 0,1,2,3,n-p + i.e : [n-p+1] +*/ bool gkfs_ecc_write(std::shared_ptr file, size_t count, off64_t offset, off64_t updated_size) { @@ -874,15 +892,7 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, std::set chunk_set; - // For each chunk we will have a set of chunks involved on that calculation - // [0] [1] [2] [3] [4] [n-p] [p1] [p2] - // [n-p+1] .... - // i.e. : [0] -> 1,2,3,4,n-p - // i.e : [4] -> 0,1,2,3,n-p - // i.e : [n-p+1] -> - // 3 data serv - // (chunk / data_servers)*data_servers --> Initial row chunk - // Involved : From initial to ... initial + data_servers + if((uint64_t) updated_size >= (uint64_t) CTX->hosts().size() * gkfs::config::rpc::chunksize) { auto data_servers = CTX->hosts().size() - CTX->get_replicas(); @@ -890,12 +900,21 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, auto initial_row_chunk = (i / data_servers) * data_servers; chunk_set.insert(initial_row_chunk); } - // Parity Stored in : parity1 .. parity2, as name = - // [PARITY][Path][Initial row chunk] // 1 - Read data from the other chunks - std::vector buffers( - data_servers, (char*) malloc(gkfs::config::rpc::chunksize)); + + char** data = (char**) malloc(sizeof(char*) * data_servers); + char** coding = (char**) malloc(sizeof(char*) * CTX->get_replicas()); + + for(unsigned int i = 0; i < data_servers; ++i) { + data[i] = (char*) malloc(gkfs::config::rpc::chunksize); + // memset(data[i], 0, gkfs::config::rpc::chunksize); + } + for(auto i = 0; i < CTX->get_replicas(); ++i) { + coding[i] = (char*) malloc(gkfs::config::rpc::chunksize); + // memset(coding[i], 0, gkfs::config::rpc::chunksize); + } + LOG(DEBUG, "Operation Size {} - Range {}-{} - data_servers {} replica_servers {}", updated_size, chunks.first, chunks.second, data_servers, @@ -909,7 +928,7 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, LOG(DEBUG, "Reading Chunk {} -> {}", i, j); auto out = gkfs::rpc::forward_read( - *path, buffers[j - i], j * gkfs::config::rpc::chunksize, + *path, data[j - i], j * gkfs::config::rpc::chunksize, gkfs::config::rpc::chunksize, 0, failed); if(out.first != 0) { LOG(ERROR, "Read Parity Error: {}", out.first); @@ -918,15 +937,10 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, // We have all the data to process a EC - std::vector coding( - CTX->get_replicas(), - (char*) malloc(gkfs::config::rpc::chunksize)); - auto matrix = reed_sol_vandermonde_coding_matrix( data_servers, CTX->get_replicas(), 8); jerasure_matrix_encode(data_servers, CTX->get_replicas(), 8, matrix, - buffers.data(), coding.data(), - gkfs::config::rpc::chunksize); + data, coding, gkfs::config::rpc::chunksize); LOG(DEBUG, "EC computation finished"); @@ -945,6 +959,8 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, } } } + free(coding); + free(data); } else { LOG(DEBUG, "No EC in small files"); return false; @@ -1005,7 +1021,10 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, write_size = ret_write.second; #ifdef GKFS_ENABLE_EC - gkfs_ecc_write(file, count, offset, updated_size); + auto res = gkfs_ecc_write(file, count, offset, updated_size); + if(res) { + LOG(ERROR, "erasure code writing failed"); + } #else if(num_replicas > 0) { diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 0368edd1c..bb10cd0cd 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -82,18 +82,11 @@ calc_op_chunks(const std::string& path, const bool append_flag, // #ifdef GKFS_ENABLE_EC /** - * Send an RPC request to write from a buffer. - * There is a bitset of 1024 chunks to tell the server - * which chunks to process. Exceeding this value will work without - * replication. Another way is to leverage mercury segments. - * TODO: Decide how to manage a write to a replica that doesn't exist + * Send an RPC request to write a single chunk of data to a given server * @param path * @param buf - * @param append_flag - * @param in_offset * @param write_size - * @param updated_metadentry_size - * @param num_copies number of replicas + * @param server destination server * @return pair */ pair @@ -572,9 +565,18 @@ forward_write(const string& path, const void* buf, const off64_t offset, } #ifdef GKFS_ENABLE_EC -// To recover a missing chunk, we need to read all the remaining -// And apply the reconstruction function. -// This function is similar to the creation function +/** + * @brief process a chunk line to recover a missing server chunk, can be used + * for multiple failures + * + * @param path + * @param buffer_recover + * @param chunk_candidate + * @param failed_server hint to indicate whether the server chunk was corrupted + * (not necessarily) + * @return true + * @return false + */ bool gkfs_ecc_recover(const std::string& path, void* buffer_recover, uint64_t chunk_candidate, uint64_t failed_server) { @@ -587,18 +589,17 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, for(unsigned int i = 0; i < data_servers; ++i) { data[i] = (char*) malloc(gkfs::config::rpc::chunksize); + memset(data[i], 0, gkfs::config::rpc::chunksize); } for(auto i = 0; i < CTX->get_replicas(); ++i) { coding[i] = (char*) malloc(gkfs::config::rpc::chunksize); + memset(coding[i], 0, gkfs::config::rpc::chunksize); } auto initial_row_chunk = (chunk_candidate / data_servers) * data_servers; - // Parity Stored in : parity1 .. parity2, as name = - // [PARITY][Path][Initial row chunk] - - // 1 - Read data from the other chunks plus the parity + // 1 - Read data from the other chunks (failures allowed) LOG(DEBUG, "Operation Size - Range {} - data_servers {} replica_servers {}", initial_row_chunk, data_servers, CTX->get_replicas()); @@ -619,16 +620,9 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, erased.push_back(j); } } - { - uint64_t md5 = 0; - for(auto k = 0; k < gkfs::config::rpc::chunksize; k++) { - md5 += data[failed_server][k]; - } - std::cout << "Content of the failed server? " << failed_server - << " --> " << md5 << std::endl; - } - // memset(data[failed_server], 3, gkfs::config::rpc::chunksize); + // Read ec codes, TODO: Delete this file once the original is gone or the + // file shrinks std::string ecc_path = path + "_ecc_" + to_string(i) + "_" + to_string(i + data_servers - 1); @@ -659,32 +653,16 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, CTX->get_replicas(), 8); res = jerasure_matrix_decode(data_servers, CTX->get_replicas(), 8, matrix, - 1, erased.data(), data, coding, + 0, erased.data(), data, coding, gkfs::config::rpc::chunksize); - std::cout << "recovered? Fails? " << failed_server << " -- " << res - << std::endl; - - { - uint64_t md5 = 0; - for(auto k = 0; k < gkfs::config::rpc::chunksize; k++) { - md5 += data[failed_server][k]; - } - std::cout << "Content of the recovered server? " << failed_server - << " --> " << md5 << std::endl; - } - + LOG(DEBUG, "EC recovered {}, with result {}", failed_server, res); memcpy(buffer_recover, data[failed_server], gkfs::config::rpc::chunksize); - { - uint64_t md5 = 0; - for(auto i = 0; i < gkfs::config::rpc::chunksize; i++) { - md5 += ((char*) buffer_recover)[i]; - } - std::cout << "md5 recovered? " << md5 << std::endl; - } LOG(DEBUG, "EC computation finished"); + free(data); + free(coding); return true; } #endif @@ -871,9 +849,11 @@ forward_read(const string& path, void* buf, const off64_t offset, LOG(ERROR, "Daemon reported error: {}", out.err()); err = out.err(); } +#ifdef GKFS_ENABLE_READ_ERRORS if(rand() % 2 == 0 and num_copies > 0) { throw std::exception(); } +#endif // GKFS_ENABLE_READ_ERRORS out_size += static_cast(out.io_size()); } catch(const std::exception& ex) { @@ -909,17 +889,17 @@ forward_read(const string& path, void* buf, const off64_t offset, // We have a chunk to recover // We don't need to worry about offset etc... just use the chunk // number - char* recovered_chunk = - (char*) malloc(gkfs::config::rpc::chunksize); - gkfs::rpc::gkfs_ecc_recover(path, recovered_chunk, chnk_id_file, - failed_server); + void* recovered_chunk = malloc(gkfs::config::rpc::chunksize); + auto recovered = gkfs::rpc::gkfs_ecc_recover( + path, recovered_chunk, chnk_id_file, failed_server); + LOG(DEBUG, "Recovered server: {} Result {}", failed_server, + recovered); // Move recovered_chunk to the buffer, first and last chunk // should substract... auto recover_size = gkfs::config::rpc::chunksize; auto recover_offt = chnk_id_file * gkfs::config::rpc::chunksize; - auto recover_offt_chunk = (chnk_id_file - chnk_start) * - gkfs::config::rpc::chunksize; + auto recover_offt_chunk = 0; if(chnk_id_file == chnk_start) { // We may need to move the offset of both buffers and reduce @@ -942,12 +922,16 @@ forward_read(const string& path, void* buf, const off64_t offset, LOG(DEBUG, "Recovered chunk : Start Offset {}/OffsetChunk {} - Size {}", recover_offt, recover_offt_chunk, recover_size); - std::cout << "Recovered " << recover_offt << " -- " - << recover_offt_chunk << " --- size " << recover_size - << std::endl; + + if(recovered) { + err = 0; + out_size += static_cast(recover_size); + } + memcpy((char*) buf + recover_offt, (char*) recovered_chunk + recover_offt_chunk, recover_size); + free(recovered_chunk); } @@ -956,7 +940,6 @@ forward_read(const string& path, void* buf, const off64_t offset, idx++; } - /* * Typically file systems return the size even if only a part of it was * read. In our case, we do not keep track which daemon fully read its -- GitLab From 801361a31ae563db4d7b0507473fb5e5db2ad767 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 29 Jun 2023 10:35:24 +0200 Subject: [PATCH 13/17] Added ondemand calculation --- CHANGELOG.md | 1 + README.md | 4 ++++ include/client/env.hpp | 4 ++++ include/client/gkfs_functions.hpp | 6 +++++ include/client/preload_context.hpp | 7 ++++++ src/client/gkfs_functions.cpp | 37 ++++++++++++++++++++++++++---- src/client/preload_context.cpp | 14 +++++++++++ 7 files changed, 69 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61982e61d..e7b1302c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ replicas ([!166](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/141) - Modified write and reads to use a bitset instead of the traditional hash per chunk in the server. - Added reattemp support in get_fs_config to other servers, when the initial server fails. - Added support for Erasure codes ([!168]) using Jerasure lib and adding support for Read error injection. +- Added support for on demand erasure codes calculation with `gkfs_ec_ondemand(fd)` ### New - Additional tests to increase code coverage ([!141](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/141)). diff --git a/README.md b/README.md index e7555635f..2da692a2a 100644 --- a/README.md +++ b/README.md @@ -333,6 +333,10 @@ The total servers available for data will be -> total servervs - num_repl Additionaly the user can enable the `-DGKFS_ENABLE_READ_ERRORS` to inject a 50% of read errors into the workflow. +We also included a function to call the EC calculation on demand over a open file: +`gkfs_ec_ondemand(int fd)`, to use it requires the environment variable `LIBGKFS_EC_ONDEMAND=1` + +The function is useful to calculate the ec only when the file is completed, like checkpoints. ## Acknowledgment This software was partially supported by the EC H2020 funded NEXTGenIO project (Project ID: 671951, www.nextgenio.eu). diff --git a/include/client/env.hpp b/include/client/env.hpp index 6cd928226..7f408e5f3 100644 --- a/include/client/env.hpp +++ b/include/client/env.hpp @@ -51,7 +51,11 @@ static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); #ifdef GKFS_ENABLE_FORWARDING static constexpr auto FORWARDING_MAP_FILE = ADD_PREFIX("FORWARDING_MAP_FILE"); #endif + static constexpr auto NUM_REPL = ADD_PREFIX("NUM_REPL"); +static constexpr auto EC_ONDEMAND = ADD_PREFIX("EC_ONDEMAND"); + + } // namespace gkfs::env #undef ADD_PREFIX diff --git a/include/client/gkfs_functions.hpp b/include/client/gkfs_functions.hpp index d005d2fb9..66e2c755b 100644 --- a/include/client/gkfs_functions.hpp +++ b/include/client/gkfs_functions.hpp @@ -159,4 +159,10 @@ gkfs_rename(const std::string& old_path, const std::string& new_path); extern "C" int gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, unsigned int count, int server); + +#ifdef GKFS_ENABLE_EC +extern "C" int +gkfs_ec_ondemand(const unsigned int fd); +#endif + #endif // GEKKOFS_GKFS_FUNCTIONS_HPP diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index c3f463dbb..9fc4dd1c9 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -105,6 +105,7 @@ private: std::bitset protected_fds_; std::string hostname; int replicas_; + bool ec_ondemand_; public: static PreloadContext* @@ -222,6 +223,12 @@ public: int get_replicas(); + + bool + get_ec_ondemand(); + + void + set_ec_ondemand(const bool ec_ondemand); }; } // namespace preload diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 2d9612f42..521c217ef 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -1021,11 +1021,14 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, write_size = ret_write.second; #ifdef GKFS_ENABLE_EC - auto res = gkfs_ecc_write(file, count, offset, updated_size); - if(res) { - LOG(ERROR, "erasure code writing failed"); + // Only compute Erasure codes if we do not have enabled the ondemand + // environment variable + if(CTX->get_ec_ondemand() == false) { + auto res = gkfs_ecc_write(file, count, offset, updated_size); + if(res) { + LOG(ERROR, "erasure code writing failed"); + } } - #else if(num_replicas > 0) { @@ -1656,3 +1659,29 @@ gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, } return written; } + +#ifdef GKFS_ENABLE_EC +/** + * This function defines an extension to calculate the erasure codes of a file + * Returns 0 on success, -1 on failure + */ +extern "C" int +gkfs_ec_ondemand(const unsigned int fd) { + if(CTX->file_map()->exist(fd)) { + auto path = CTX->file_map()->get(fd)->path(); + auto file = CTX->file_map()->get(fd); + auto md = gkfs::utils::get_metadata(path); + if(!md) { + return -1; + } + auto count = md->size(); + auto res = gkfs::syscall::gkfs_ecc_write(file, count, 0, count); + if(!res) { + return -1; + } + + return 0; + } + return -1; +} +#endif \ No newline at end of file diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index da534e6d0..d538cd5ec 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -67,6 +67,9 @@ PreloadContext::PreloadContext() hostname = host; PreloadContext::set_replicas( std::stoi(gkfs::env::get_var(gkfs::env::NUM_REPL, "0"))); + + PreloadContext::set_ec_ondemand( + std::stoi(gkfs::env::get_var(gkfs::env::EC_ONDEMAND, "0")) == 1); } void @@ -459,5 +462,16 @@ PreloadContext::get_replicas() { return replicas_; } +void +PreloadContext::set_ec_ondemand(const bool ec_ondemand) { + ec_ondemand_ = ec_ondemand; +} + +bool +PreloadContext::get_ec_ondemand() { + return ec_ondemand_; +} + + } // namespace preload } // namespace gkfs -- GitLab From a8fb6144372dc5975e16a2a41b8a39f3cef074c8 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 29 Jun 2023 13:55:35 +0200 Subject: [PATCH 14/17] Free array --- CMakeLists.txt | 4 ++++ src/client/gkfs_functions.cpp | 11 +++++++++-- src/client/rpc/forward_data.cpp | 10 ++++++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2070e9173..d61d8a675 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -211,6 +211,10 @@ if(GKFS_ENABLE_EC) find_package(Jerasure REQUIRED) endif() +if(GKFS_ENABLE_READ_ERRORS) + add_compile_definitions(GKFS_ENABLE_READ_ERRORS) +endif() + ### Other stuff that can be found out using find_package: # determine the thread library of the system diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 521c217ef..a5579e3e7 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -937,13 +937,13 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, // We have all the data to process a EC - auto matrix = reed_sol_vandermonde_coding_matrix( + int* matrix = reed_sol_vandermonde_coding_matrix( data_servers, CTX->get_replicas(), 8); jerasure_matrix_encode(data_servers, CTX->get_replicas(), 8, matrix, data, coding, gkfs::config::rpc::chunksize); LOG(DEBUG, "EC computation finished"); - + free(matrix); // Write erasure std::string ecc_path = file->path() + "_ecc_" + to_string(i) + "_" + to_string(i + data_servers - 1); @@ -959,6 +959,13 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, } } } + for(unsigned int i = 0; i < data_servers; ++i) { + free(data[i]); + } + for(auto i = 0; i < CTX->get_replicas(); ++i) { + free(coding[i]); + } + free(coding); free(data); } else { diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index bb10cd0cd..25227a726 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -649,18 +649,24 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, int res = 0; // We have all the data to recover the buffer - auto matrix = reed_sol_vandermonde_coding_matrix(data_servers, + int* matrix = reed_sol_vandermonde_coding_matrix(data_servers, CTX->get_replicas(), 8); res = jerasure_matrix_decode(data_servers, CTX->get_replicas(), 8, matrix, 0, erased.data(), data, coding, gkfs::config::rpc::chunksize); + free(matrix); LOG(DEBUG, "EC recovered {}, with result {}", failed_server, res); memcpy(buffer_recover, data[failed_server], gkfs::config::rpc::chunksize); LOG(DEBUG, "EC computation finished"); - + for(unsigned int i = 0; i < data_servers; ++i) { + free(data[i]); + } + for(auto i = 0; i < CTX->get_replicas(); ++i) { + free(coding[i]); + } free(data); free(coding); return true; -- GitLab From 9aeb467f9b733af7979a659193d6663391e15893 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 29 Jun 2023 14:09:16 +0200 Subject: [PATCH 15/17] corrected offset --- src/client/rpc/forward_data.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 25227a726..e503826f0 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -904,7 +904,8 @@ forward_read(const string& path, void* buf, const off64_t offset, // Move recovered_chunk to the buffer, first and last chunk // should substract... auto recover_size = gkfs::config::rpc::chunksize; - auto recover_offt = chnk_id_file * gkfs::config::rpc::chunksize; + auto recover_offt = (chnk_id_file - chnk_start) * + gkfs::config::rpc::chunksize; auto recover_offt_chunk = 0; if(chnk_id_file == chnk_start) { -- GitLab From 672678f1ce7377da591dd04d852b107df45949f5 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Fri, 30 Jun 2023 13:09:33 +0200 Subject: [PATCH 16/17] Avoided possible recursion loop in failures --- src/client/gkfs_functions.cpp | 2 - src/client/rpc/forward_data.cpp | 123 +++++++++++++++++--------------- 2 files changed, 65 insertions(+), 60 deletions(-) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index a5579e3e7..5ddd79880 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -908,11 +908,9 @@ gkfs_ecc_write(std::shared_ptr file, size_t count, for(unsigned int i = 0; i < data_servers; ++i) { data[i] = (char*) malloc(gkfs::config::rpc::chunksize); - // memset(data[i], 0, gkfs::config::rpc::chunksize); } for(auto i = 0; i < CTX->get_replicas(); ++i) { coding[i] = (char*) malloc(gkfs::config::rpc::chunksize); - // memset(coding[i], 0, gkfs::config::rpc::chunksize); } LOG(DEBUG, diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index e503826f0..a400af25f 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -589,11 +589,9 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, for(unsigned int i = 0; i < data_servers; ++i) { data[i] = (char*) malloc(gkfs::config::rpc::chunksize); - memset(data[i], 0, gkfs::config::rpc::chunksize); } for(auto i = 0; i < CTX->get_replicas(); ++i) { coding[i] = (char*) malloc(gkfs::config::rpc::chunksize); - memset(coding[i], 0, gkfs::config::rpc::chunksize); } auto initial_row_chunk = (chunk_candidate / data_servers) * data_servers; @@ -611,7 +609,8 @@ gkfs_ecc_recover(const std::string& path, void* buffer_recover, for(uint64_t j = 0; j < data_servers; ++j) { std::set failed; LOG(DEBUG, "Reading Chunk {} -> {}, from server {}", i, i + j, j); - + // We set num_replicas to 0, to avoid recursion issues + // We only want to read, no recover and we could do it without loop... auto out = gkfs::rpc::forward_read( path, data[j], (j + i) * gkfs::config::rpc::chunksize, gkfs::config::rpc::chunksize, 0, failed); @@ -877,69 +876,77 @@ forward_read(const string& path, void* buf, const off64_t offset, // Decode the data // Fill the gaps, and then remove the failed server while // keeping the variables consistent. - auto failed_server = targets[idx]; + if(num_copies > 0) { + auto failed_server = targets[idx]; - // For all the chunks activated in the bitset, recover and fill the - // buffer. - for(auto chnk_id_file = chnk_start; chnk_id_file <= chnk_end; - chnk_id_file++) { - // Continue if chunk does not hash to this host - // We only check if we are not using replicas + // For all the chunks activated in the bitset, recover and fill + // the buffer. + for(auto chnk_id_file = chnk_start; chnk_id_file <= chnk_end; + chnk_id_file++) { + // Continue if chunk does not hash to this host + // We only check if we are not using replicas - if(!(gkfs::rpc::get_bitset(read_bitset_vect[failed_server], - chnk_id_file - chnk_start))) { + if(!(gkfs::rpc::get_bitset(read_bitset_vect[failed_server], + chnk_id_file - chnk_start))) { - continue; - } + continue; + } - // We have a chunk to recover - // We don't need to worry about offset etc... just use the chunk - // number - void* recovered_chunk = malloc(gkfs::config::rpc::chunksize); - auto recovered = gkfs::rpc::gkfs_ecc_recover( - path, recovered_chunk, chnk_id_file, failed_server); - LOG(DEBUG, "Recovered server: {} Result {}", failed_server, - recovered); - - // Move recovered_chunk to the buffer, first and last chunk - // should substract... - auto recover_size = gkfs::config::rpc::chunksize; - auto recover_offt = (chnk_id_file - chnk_start) * - gkfs::config::rpc::chunksize; - auto recover_offt_chunk = 0; - - if(chnk_id_file == chnk_start) { - // We may need to move the offset of both buffers and reduce - // the recover size - auto offset_fc = - block_overrun(offset, gkfs::config::rpc::chunksize); - recover_offt += offset_fc; - recover_offt_chunk += offset_fc; - recover_size -= offset_fc; - } - if(chnk_id_file == chnk_end) { - // We may need to reduce the recover size. - if(!is_aligned(offset + read_size, - gkfs::config::rpc::chunksize)) { - recover_size -= - block_underrun(offset + read_size, - gkfs::config::rpc::chunksize); + // We have a chunk to recover + // We don't need to worry about offset etc... just use the + // chunk number + void* recovered_chunk = + malloc(gkfs::config::rpc::chunksize); + auto recovered = gkfs::rpc::gkfs_ecc_recover( + path, recovered_chunk, chnk_id_file, failed_server); + LOG(DEBUG, "Recovered server: {} Result {}", failed_server, + recovered); + + // Move recovered_chunk to the buffer, first and last chunk + // should substract... + auto recover_size = gkfs::config::rpc::chunksize; + auto recover_offt = (chnk_id_file - chnk_start) * + gkfs::config::rpc::chunksize; + auto recover_offt_chunk = 0; + + if(chnk_id_file == chnk_start) { + // We may need to move the offset of both buffers and + // reduce the recover size + auto offset_fc = block_overrun( + offset, gkfs::config::rpc::chunksize); + recover_offt += offset_fc; + recover_offt_chunk += offset_fc; + recover_size -= offset_fc; + } + if(chnk_id_file == chnk_end) { + // We may need to reduce the recover size. + if(!is_aligned(offset + read_size, + gkfs::config::rpc::chunksize)) { + recover_size -= block_underrun( + offset + read_size, + gkfs::config::rpc::chunksize); + } + } + LOG(DEBUG, + "Recovered chunk : Start Offset {}/OffsetChunk {} - Size {}", + recover_offt, recover_offt_chunk, recover_size); + + if(recovered) { + err = 0; + out_size += static_cast(recover_size); + } else { + err = EIO; + out_size = -1; + LOG(ERROR, "Can't recover error with ec"); } - } - LOG(DEBUG, - "Recovered chunk : Start Offset {}/OffsetChunk {} - Size {}", - recover_offt, recover_offt_chunk, recover_size); - if(recovered) { - err = 0; - out_size += static_cast(recover_size); - } - memcpy((char*) buf + recover_offt, - (char*) recovered_chunk + recover_offt_chunk, - recover_size); + memcpy((char*) buf + recover_offt, + (char*) recovered_chunk + recover_offt_chunk, + recover_size); - free(recovered_chunk); + free(recovered_chunk); + } } #endif -- GitLab From c8e5ee5785cc104d71903ecf59f87cf243bbf768 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Tue, 3 Oct 2023 08:26:49 +0200 Subject: [PATCH 17/17] Updated size bug --- src/client/gkfs_functions.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 5ddd79880..d5120ab13 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -1029,7 +1029,7 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, // Only compute Erasure codes if we do not have enabled the ondemand // environment variable if(CTX->get_ec_ondemand() == false) { - auto res = gkfs_ecc_write(file, count, offset, updated_size); + auto res = gkfs_ecc_write(file, count, offset, write_size); if(res) { LOG(ERROR, "erasure code writing failed"); } @@ -1039,9 +1039,9 @@ gkfs_pwrite(std::shared_ptr file, const char* buf, auto ret_write_repl = gkfs::rpc::forward_write(*path, buf, offset, count, num_replicas); - write_size = ret_write_repl.second; - } + write_size = ret_write_repl.second; } +} #endif if(err) { LOG(WARNING, "gkfs::rpc::forward_write() failed with err '{}'", err); -- GitLab