From 32b81b93dfaa8dad3f4a6b03098862664935bccf Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 26 Jan 2022 13:35:00 +0100 Subject: [PATCH] GekkoFS daemon can now be restarted without losing its data A subdirectory is no longer created by default. Therefore, a server restart uses same directory for both data and metadata. Note, that the rootdir can be cleaned with the -c argument. For multiple daemons on one machine, the new argument --rootdir-suffix can be used which creates a subdirectory within the rootdir with a user-defined name. This also allows restarting multiple daemons on one node without losing data. --- README.md | 24 +++++++++++++------- include/config.hpp | 6 +++++ include/daemon/classes/fs_data.hpp | 21 ++++++++++++------ src/client/preload_util.cpp | 9 +++++++- src/daemon/classes/fs_data.cpp | 10 +++++++++ src/daemon/daemon.cpp | 35 +++++++++++++++++++++++------- src/daemon/util.cpp | 12 +++++++--- 7 files changed, 90 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 89abed982..28779a71c 100644 --- a/README.md +++ b/README.md @@ -83,12 +83,13 @@ Further options are available: ```bash Allowed options -Usage: bin/gkfs_daemon [OPTIONS] +Usage: src/daemon/gkfs_daemon [OPTIONS] Options: -h,--help Print this help message and exit -m,--mountdir TEXT REQUIRED Virtual mounting directory where GekkoFS is available. -r,--rootdir TEXT REQUIRED Local data directory where GekkoFS data for this daemon is stored. + -s,--rootdir-suffix TEXT Creates an additional directory within the rootdir, allowing multiple daemons on one node. -i,--metadir TEXT Metadata directory where GekkoFS RocksDB data directory is located. If not set, rootdir is used. -l,--listen TEXT Address or interface to bind the daemon to. Default: local hostname. When used with ofi+verbs the FI_VERBS_IFACE environment variable is set accordingly which associates the verbs device with the network interface. In case FI_VERBS_IFACE is already defined, the argument is ignored. Default 'ib'. @@ -97,27 +98,34 @@ Options: Available: {ofi+sockets, ofi+verbs, ofi+psm2} for TCP, Infiniband, and Omni-Path, respectively. (Default ofi+sockets) Libfabric must have enabled support verbs or psm2. --auto-sm Enables intra-node communication (IPCs) via the `na+sm` (shared memory) protocol, instead of using the RPC protocol. (Default off) - --clean-rootdir Cleans Rootdir >before< launching the deamon + -c,--clean-rootdir Cleans Rootdir >before< launching the deamon --version Print version and exit. ``` +It is possible to run multiple independent GekkoFS instances on the same node. Note, that when these GekkoFS instances +are part of the same file system, use the same `rootdir` with different `rootdir-suffixe`s. + Shut it down by gracefully killing the process (SIGTERM). ## Use the GekkoFS client library -tl;dr example: +tl;dr example: + ```bash export LIBGKFS_ HOSTS_FILE= LD_PRELOAD=/lib64/libgkfs_intercept.so cp ~/some_input_data /some_input_data LD_PRELOAD=/lib64/libgkfs_intercept.so md5sum ~/some_input_data /some_input_data ``` -Clients read the hostsfile to determine which daemons are part of the GekkoFS instance. -Because the client is an interposition library that is loaded within the context of the application, this information is passed via the environment variable `LIBGKFS_HOSTS_FILE` pointing to the hostsfile path. -The client library itself is loaded for each application process via the `LD_PRELOAD` environment variable intercepting file system related calls. -If they are within (or hierarchically under) the GekkoFS mount directory they are processed in the library, otherwise they are passed to the kernel. +Clients read the hostsfile to determine which daemons are part of the GekkoFS instance. Because the client is an +interposition library that is loaded within the context of the application, this information is passed via the +environment variable `LIBGKFS_HOSTS_FILE` pointing to the hostsfile path. The client library itself is loaded for each +application process via the `LD_PRELOAD` environment variable intercepting file system related calls. If they are +within (or hierarchically under) the GekkoFS mount directory they are processed in the library, otherwise they are +passed to the kernel. -Note, if `LD_PRELOAD` is not pointing to the library and, hence the client is not loaded, the mounting directory appear to be empty. +Note, if `LD_PRELOAD` is not pointing to the library and, hence the client is not loaded, the mounting directory appears +to be empty. For MPI application, the `LD_PRELOAD` variable can be passed with the `-x` argument for `mpirun/mpiexec`. diff --git a/include/config.hpp b/include/config.hpp index dd77117d6..56fcd0ede 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -76,6 +76,10 @@ constexpr auto implicit_data_removal = true; // level constexpr auto create_exist_check = true; } // namespace metadata +namespace data { +// directory name below rootdir where chunks are placed +constexpr auto chunk_dir = "chunks"; +} // namespace data namespace rpc { constexpr auto chunksize = 524288; // in bytes (e.g., 524288 == 512KB) @@ -94,6 +98,8 @@ constexpr auto daemon_handler_xstreams = 4; namespace rocksdb { // Write-ahead logging of rocksdb constexpr auto use_write_ahead_log = false; +// directory name where the rocksdb instance is placed +constexpr auto data_dir = "rocksdb"; } // namespace rocksdb } // namespace gkfs::config diff --git a/include/daemon/classes/fs_data.hpp b/include/daemon/classes/fs_data.hpp index e638b5dda..2b0b7b9fe 100644 --- a/include/daemon/classes/fs_data.hpp +++ b/include/daemon/classes/fs_data.hpp @@ -50,20 +50,21 @@ namespace daemon { class FsData { private: - FsData() {} + FsData() = default; // logger std::shared_ptr spdlogger_; // paths - std::string rootdir_; - std::string mountdir_; - std::string metadir_; + std::string rootdir_{}; + std::string rootdir_suffix_{}; + std::string mountdir_{}; + std::string metadir_{}; // RPC management - std::string rpc_protocol_; - std::string bind_addr_; - std::string hosts_file_; + std::string rpc_protocol_{}; + std::string bind_addr_{}; + std::string hosts_file_{}; bool use_auto_sm_; // Database @@ -104,6 +105,12 @@ public: void rootdir(const std::string& rootdir_); + const std::string& + rootdir_suffix() const; + + void + rootdir_suffix(const std::string& rootdir_suffix_); + const std::string& mountdir() const; diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index b4e4206b2..595dcae68 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -174,6 +174,14 @@ load_hostfile(const std::string& path) { "Hosts file found but no suitable addresses could be extracted"); } extract_protocol(hosts[0].second); + // sort hosts so that data always hashes to the same place during restart + std::sort(hosts.begin(), hosts.end()); + // remove rootdir suffix from host after sorting as no longer required + for(auto& h : hosts) { + auto idx = h.first.rfind("#"); + if(idx != string::npos) + h.first.erase(idx, h.first.length()); + } return hosts; } @@ -362,7 +370,6 @@ read_hosts_file() { } LOG(INFO, "Hosts pool size: {}", hosts.size()); - sort(hosts.begin(), hosts.end()); // Sort hosts by alphanumerical value. return hosts; } diff --git a/src/daemon/classes/fs_data.cpp b/src/daemon/classes/fs_data.cpp index 78d364562..ca5ae88aa 100644 --- a/src/daemon/classes/fs_data.cpp +++ b/src/daemon/classes/fs_data.cpp @@ -80,6 +80,16 @@ FsData::rootdir(const std::string& rootdir) { FsData::rootdir_ = rootdir; } +const std::string& +FsData::rootdir_suffix() const { + return rootdir_suffix_; +} + +void +FsData::rootdir_suffix(const std::string& rootdir_suffix) { + FsData::rootdir_suffix_ = rootdir_suffix; +} + const std::string& FsData::mountdir() const { return mountdir_; diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 456bcac13..1a88b6833 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -73,6 +73,7 @@ static mutex mtx; // mutex to wait on shutdown conditional variable struct cli_options { string mountdir; string rootdir; + string rootdir_suffix; string metadir; string listen; string hosts_file; @@ -239,7 +240,8 @@ init_rpc_server() { void init_environment() { // Initialize metadata db - std::string metadata_path = GKFS_DATA->metadir() + "/rocksdb"s; + auto metadata_path = fmt::format("{}/{}", GKFS_DATA->metadir(), + gkfs::config::rocksdb::data_dir); GKFS_DATA->spdlogger()->debug("{}() Initializing metadata DB: '{}'", __func__, metadata_path); try { @@ -286,7 +288,8 @@ init_environment() { #endif // Initialize data backend - std::string chunk_storage_path = GKFS_DATA->rootdir() + "/data/chunks"s; + auto chunk_storage_path = fmt::format("{}/{}", GKFS_DATA->rootdir(), + gkfs::config::data::chunk_dir); GKFS_DATA->spdlogger()->debug("{}() Initializing storage backend: '{}'", __func__, chunk_storage_path); fs::create_directories(chunk_storage_path); @@ -333,6 +336,8 @@ init_environment() { gkfs::metadata::Metadata root_md{S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO}; try { gkfs::metadata::create("/", root_md); + } catch(const gkfs::metadata::ExistsException& e) { + // launched on existing directory which is no error } catch(const std::exception& e) { throw runtime_error("Failed to write root metadentry to KV store: "s + e.what()); @@ -527,7 +532,20 @@ parse_input(const cli_options& opts, const CLI::App& desc) { // In forwarding mode, the backend is shared auto rootdir_path = fs::path(rootdir); #else - auto rootdir_path = fs::path(rootdir) / fmt::format_int(getpid()).str(); + auto rootdir_path = fs::path(rootdir); + if(desc.count("--rootdir-suffix")) { + if(opts.rootdir_suffix == gkfs::config::data::chunk_dir || + opts.rootdir_suffix == gkfs::config::rocksdb::data_dir) + throw runtime_error(fmt::format( + "rootdir_suffix '{}' is reserved and not allowed.", + opts.rootdir_suffix)); + if(opts.rootdir_suffix.find('#') != string::npos) + throw runtime_error(fmt::format( + "The character '#' in the rootdir_suffix is reserved and not allowed.")); + // append path with a directory separator + rootdir_path /= opts.rootdir_suffix; + GKFS_DATA->rootdir_suffix(opts.rootdir_suffix); + } #endif if(desc.count("--clean-rootdir")) { @@ -592,13 +610,14 @@ main(int argc, const char* argv[]) { // clang-format off desc.add_option("--mountdir,-m", opts.mountdir, "Virtual mounting directory where GekkoFS is available.") - ->required() - ->expected(1); + ->required(); desc.add_option( "--rootdir,-r", opts.rootdir, "Local data directory where GekkoFS data for this daemon is stored.") - ->required() - ->expected(1); + ->required(); + desc.add_option( + "--rootdir-suffix,-s", opts.rootdir_suffix, + "Creates an additional directory within the rootdir, allowing multiple daemons on one node."); desc.add_option( "--metadir,-i", opts.metadir, "Metadata directory where GekkoFS' RocksDB data directory is located. If not set, rootdir is used."); @@ -622,7 +641,7 @@ main(int argc, const char* argv[]) { "Enables intra-node communication (IPCs) via the `na+sm` (shared memory) protocol, " "instead of using the RPC protocol. (Default off)"); desc.add_flag( - "--clean-rootdir", + "--clean-rootdir,-c", "Cleans Rootdir >before< launching the deamon"); desc.add_flag("--version", "Print version and exit."); // clang-format on diff --git a/src/daemon/util.cpp b/src/daemon/util.cpp index 0fb7ca327..c4042882d 100644 --- a/src/daemon/util.cpp +++ b/src/daemon/util.cpp @@ -39,7 +39,8 @@ namespace gkfs::utils { * @internal * Appends a single line to an existing shared hosts file with the RPC * connection information of this daemon. If it doesn't exist, it is created. - * The line includes the hostname and the RPC server's listening address. + * The line includes the hostname (and rootdir_suffix if applicable) and the RPC + * server's listening address. * * NOTE, the shared file system must support strong consistency semantics to * ensure each daemon can write its information to the file even if the write @@ -56,8 +57,13 @@ populate_hosts_file() { throw runtime_error(fmt::format("Failed to open hosts file '{}': {}", hosts_file, strerror(errno))); } - lfstream << fmt::format("{} {}", gkfs::rpc::get_my_hostname(true), - RPC_DATA->self_addr_str()) + // if rootdir_suffix is used, append it to hostname + auto hostname = + GKFS_DATA->rootdir_suffix().empty() + ? gkfs::rpc::get_my_hostname(true) + : fmt::format("{}#{}", gkfs::rpc::get_my_hostname(true), + GKFS_DATA->rootdir_suffix()); + lfstream << fmt::format("{} {}", hostname, RPC_DATA->self_addr_str()) << std::endl; if(!lfstream) { throw runtime_error( -- GitLab