From 0689244034d92a4a5ebfabe4a4b8e520996935ca Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 27 Aug 2021 14:40:08 +0200 Subject: [PATCH 1/2] Adding clion run environment to git ignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a6a9e0756..e07ffaac9 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,7 @@ # .idea *.idea/ +.run/ # OS generated files .DS_Store -- GitLab From bb560826f8e68ed39d96b82942f68148ba7e340d Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 27 Jan 2022 16:40:39 +0100 Subject: [PATCH 2/2] Adding documentation for daemon (1st part) --- include/daemon/backend/data/chunk_storage.hpp | 104 +++++++- include/daemon/backend/data/data_module.hpp | 26 +- include/daemon/backend/data/file_handle.hpp | 37 ++- include/daemon/backend/metadata/db.hpp | 114 ++++++++- include/daemon/daemon.hpp | 15 +- include/daemon/env.hpp | 4 + include/daemon/handler/rpc_defs.hpp | 4 + include/daemon/handler/rpc_util.hpp | 63 ++++- include/daemon/ops/data.hpp | 238 +++++++++++++----- include/daemon/util.hpp | 10 + src/daemon/backend/data/chunk_storage.cpp | 84 +++---- src/daemon/backend/data/data_module.cpp | 5 +- src/daemon/backend/metadata/db.cpp | 127 +++------- src/daemon/daemon.cpp | 96 ++++++- src/daemon/handler/srv_data.cpp | 107 ++++++-- src/daemon/handler/srv_management.cpp | 23 +- src/daemon/handler/srv_metadata.cpp | 164 +++++++++++- src/daemon/ops/data.cpp | 43 ++-- src/daemon/util.cpp | 21 +- 19 files changed, 967 insertions(+), 318 deletions(-) diff --git a/include/daemon/backend/data/chunk_storage.hpp b/include/daemon/backend/data/chunk_storage.hpp index 873302fff..b585469ed 100644 --- a/include/daemon/backend/data/chunk_storage.hpp +++ b/include/daemon/backend/data/chunk_storage.hpp @@ -25,6 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Chunk storage declarations handles all interactions with the + * node-local storage system. + */ #ifndef GEKKOFS_CHUNK_STORAGE_HPP #define GEKKOFS_CHUNK_STORAGE_HPP @@ -47,56 +51,140 @@ struct ChunkStat { unsigned long chunk_size; unsigned long chunk_total; unsigned long chunk_free; -}; +}; //!< Struct for attaining current usage of storage backend +/** + * @brief Generic exception for ChunkStorage + */ class ChunkStorageException : public std::system_error { public: ChunkStorageException(const int err_code, const std::string& s) : std::system_error(err_code, std::generic_category(), s){}; }; +/** + * @brief ChunkStorage class handles _all_ interaction with node-local storage + * system and is run as a single instance within the GekkoFS daemon. + */ class ChunkStorage { private: - std::shared_ptr log_; - - std::string root_path_; - size_t chunksize_; - - inline std::string + std::shared_ptr log_; //!< Class logger + + std::string root_path_; //!< Path to GekkoFS root directory + size_t chunksize_; //!< File system chunksize. TODO Why does that exist? + + /** + * @brief Converts an internal gkfs path under the root dir to the absolute + * path of the system. + * @param internal_path E.g., /foo/bar + * @return Absolute path, e.g., /tmp/rootdir//data/chunks/foo:bar + */ + [[nodiscard]] inline std::string absolute(const std::string& internal_path) const; + /** + * @brief Returns the chunk dir directory for a given path which is expected + * to be absolute. + * @param file_path + * @return Chunk dir path + */ static inline std::string get_chunks_dir(const std::string& file_path); + /** + * @brief Returns the backend chunk file path for a given internal path. + * @param file_path Internal file path, e.g., /foo/bar + * @param chunk_id Number of chunk id + * @return Chunk file path, e.g., /foo/bar + * /tmp/rootdir/>/data/chunks/foo:bar/0 + */ static inline std::string get_chunk_path(const std::string& file_path, gkfs::rpc::chnk_id_t chunk_id); + /** + * @brief Initializes the chunk space for a GekkoFS file, creating its + * directory on the local file system. + * @param file_path Chunk file path, e.g., /foo/bar + */ void init_chunk_space(const std::string& file_path) const; public: + /** + * @brief Initializes the ChunkStorage object on daemon launch. + * @param path Root directory where all data is placed on the local FS. + * @param chunksize Used chunksize in this GekkoFS instance. + * @throws ChunkStorageException on launch failure + */ ChunkStorage(std::string& path, size_t chunksize); + /** + * @brief Removes chunk directory with all its files which is a recursive + * remove operation on the chunk directory. + * @param file_path Chunk file path, e.g., /foo/bar + * @throws ChunkStorageException + */ void destroy_chunk_space(const std::string& file_path) const; + /** + * @brief Writes a single chunk file and is usually called by an Argobots + * tasklet. + * @param file_path Chunk file path, e.g., /foo/bar + * @param chunk_id Number of chunk id + * @param buf Buffer to write to chunk + * @param size Amount of bytes to write to the chunk file + * @param offset Offset where to write to the chunk file + * @return The amount of bytes written + * @throws ChunkStorageException with its error code + */ ssize_t write_chunk(const std::string& file_path, gkfs::rpc::chnk_id_t chunk_id, const char* buf, size_t size, off64_t offset) const; + /** + * @brief Reads a single chunk file and is usually called by an Argobots + * tasklet. + * @param file_path Chunk file path, e.g., /foo/bar + * @param chunk_id Number of chunk id + * @param buf Buffer to read to from chunk + * @param size Amount of bytes to read to the chunk file + * @param offset Offset where to read from the chunk file + * @return The amount of bytes read + * @throws ChunkStorageException with its error code + */ ssize_t read_chunk(const std::string& file_path, gkfs::rpc::chnk_id_t chunk_id, char* buf, size_t size, off64_t offset) const; + /** + * @brief Delete all chunks starting with chunk a chunk id. + * @param file_path Chunk file path, e.g., /foo/bar + * @param chunk_start Number of chunk id + * @throws ChunkStorageException with its error code + */ void trim_chunk_space(const std::string& file_path, gkfs::rpc::chnk_id_t chunk_start); + /** + * @brief Truncates a single chunk file to a given byte length. + * @param file_path Chunk file path, e.g., /foo/bar + * @param chunk_id Number of chunk id + * @param length Length of bytes to truncate the chunk to + * @throws ChunkStorageException + */ void truncate_chunk_file(const std::string& file_path, gkfs::rpc::chnk_id_t chunk_id, off_t length); - ChunkStat + /** + * @brief Calls statfs on the chunk directory to get statistic on its used + * storage space. + * @return ChunkStat struct + * @throws ChunkStorageException + */ + [[nodiscard]] ChunkStat chunk_stat() const; }; diff --git a/include/daemon/backend/data/data_module.hpp b/include/daemon/backend/data/data_module.hpp index 73bbecd33..de3bda099 100644 --- a/include/daemon/backend/data/data_module.hpp +++ b/include/daemon/backend/data/data_module.hpp @@ -25,6 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Declaration for GekkoFS data module object that is run once per daemon + * instance. + */ #ifndef GEKKOFS_DAEMON_DATA_LOGGING_HPP #define GEKKOFS_DAEMON_DATA_LOGGING_HPP @@ -33,12 +37,16 @@ namespace gkfs::data { +/** + * @brief The data module class providing the data backend for the daemon as a + * singleton. + */ class DataModule { private: - DataModule() {} + DataModule() = default; - std::shared_ptr log_; + std::shared_ptr log_; ///< Logging instance for data backend public: static constexpr const char* LOGGER_NAME = "DataModule"; @@ -54,16 +62,26 @@ public: void operator=(DataModule const&) = delete; - const std::shared_ptr& + /** + * @brief Returns the data module log handle. + * @return Pointer to the spdlog instance + */ + [[nodiscard]] const std::shared_ptr& log() const; + /** + * @brief Attaches a logging instance to the data module. + * @param log spdlog shared pointer instance + */ void log(const std::shared_ptr& log); }; #define GKFS_DATA_MOD \ (static_cast( \ - gkfs::data::DataModule::getInstance())) + gkfs::data::DataModule::getInstance())) ///< macro to access the + ///< DataModule singleton + ///< across the daemon } // namespace gkfs::data diff --git a/include/daemon/backend/data/file_handle.hpp b/include/daemon/backend/data/file_handle.hpp index 2a5c85d4e..d130489d5 100644 --- a/include/daemon/backend/data/file_handle.hpp +++ b/include/daemon/backend/data/file_handle.hpp @@ -25,7 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ - +/** + * @brief Definitions for the file handle abstraction layer used for the backend + * file system. + */ #ifndef GEKKOFS_DAEMON_FILE_HANDLE_HPP #define GEKKOFS_DAEMON_FILE_HANDLE_HPP @@ -38,21 +41,21 @@ extern "C" { namespace gkfs::data { /** - * File handle to encapsulate a file descriptor, allowing RAII closing of the - * file descriptor + * @brief File handle class to encapsulate a file descriptor, allowing RAII + * closing of the file descriptor. */ class FileHandle { private: - constexpr static const int init_value{-1}; + constexpr static const int init_value{-1}; ///< initial file descriptor - int fd_{init_value}; - std::string path_{}; + int fd_{init_value}; ///< file descriptor + std::string path_{}; ///< chunk file path public: FileHandle() = default; - explicit FileHandle(int fd, std::string path) noexcept : fd_(fd) {} + explicit FileHandle(int fd, const std::string& path) noexcept : fd_(fd) {} FileHandle(FileHandle&& rhs) = default; @@ -73,19 +76,28 @@ public: return !valid(); } - bool + /** + * @brief Checks for valid file descriptor value. + * @return boolean if valid file descriptor + */ + [[nodiscard]] bool valid() const noexcept { return fd_ != init_value; } - int + /** + * @brief Retusn the file descriptor value used in this file handle + * operation. + * @return file descriptor value + */ + [[nodiscard]] int native() const noexcept { return fd_; } /** - * Closes file descriptor and resets it to initial value - * @return + * @brief Closes file descriptor and resets it to initial value + * @return boolean if file descriptor was successfully closed */ bool close() noexcept { @@ -101,6 +113,9 @@ public: return true; } + /** + * @brief Destructor implicitly closes the internal file descriptor. + */ ~FileHandle() { if(fd_ != init_value) close(); diff --git a/include/daemon/backend/metadata/db.hpp b/include/daemon/backend/metadata/db.hpp index 874c7e892..6175320db 100644 --- a/include/daemon/backend/metadata/db.hpp +++ b/include/daemon/backend/metadata/db.hpp @@ -25,7 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ - +/** + * @brief Class declaration for MetadataDB class which uses RocksDB and provides + * a single instance within the daemon. + */ #ifndef GEKKOFS_METADATA_DB_HPP #define GEKKOFS_METADATA_DB_HPP @@ -39,54 +42,145 @@ namespace rdb = rocksdb; namespace gkfs::metadata { +/** + * @brief MetadataDB class providing an abstraction layer to the KV store + * RocksDB. + */ class MetadataDB { private: - std::unique_ptr db; - rdb::Options options; - rdb::WriteOptions write_opts; - std::string path; - std::shared_ptr log_; - + std::unique_ptr db; ///< RocksDB instance + rdb::Options options; ///< RocksDB configuration + rdb::WriteOptions write_opts; ///< RocksDB write configuration + std::string path; ///< Path to where RocksDB persists its data + std::shared_ptr log_; ///< MetadataDB internal logger + + /** + * @brief Sets up specific settings to optimize RocksDB instance to + * environment on launch. + * @param options RocksDB configurations + */ static void optimize_rocksdb_options(rdb::Options& options); public: + /** + * @brief Exception wrapper on Status object. + * @param s RocksDB status + * @throws Throws NotFoundException if s.IsNotFound(), general DBException + * otherwise + */ static inline void throw_rdb_status_excpt(const rdb::Status& s); + /** + * @brief Constructor, called when daemon is started and connects to KV + * store. + * @param path Path to where RocksDB persists its data + */ explicit MetadataDB(const std::string& path); - std::string + /** + * @brief Gets the KV store value for a key. + * @param key KV store key + * @return KV store value + * @throws DBException on failure, NotFoundException if entry doesn't exist + */ + [[nodiscard]] std::string get(const std::string& key) const; + /** + * @brief Puts an entry into the KV store. + * @param key KV store key + * @param val KV store value + * @throws DBException + */ void put(const std::string& key, const std::string& val); + /** + * @brief Puts an entry into the KV store if it doesn't exist. + * @param key KV store key + * @param val KV store value + * @throws DBException on failure, ExistException if entry already exists + */ void put_no_exist(const std::string& key, const std::string& val); + /** + * @brief Removes an entry from the KV store. + * @param key KV store key + * @throws DBException on failure, NotFoundException if entry doesn't exist + */ void remove(const std::string& key); + /** + * @brief Checks for existence of an entry. + * @param key KV store key + * @return true if exists + * @throws DBException on failure, NotFoundException if entry doesn't exist + */ bool exists(const std::string& key); + /** + * Updates a metadata entry atomically and also allows to change keys. + * @param old_key KV store key to be replaced + * @param new_key new KV store key + * @param val KV store value + * @throws DBException on failure, NotFoundException if entry doesn't exist + */ void update(const std::string& old_key, const std::string& new_key, const std::string& val); + /** + * @brief Increases only the size part of the metadata entry via a RocksDB + * Operand. + * @param key KV store key + * @param size new size for entry + * @param append + * @throws DBException on failure, NotFoundException if entry doesn't exist + */ void increase_size(const std::string& key, size_t size, bool append); + /** + * @brief Decreases only the size part of the metadata entry via a RocksDB + * Operand/ + * @param key KV store key + * @param size new size for entry + * @throws DBException on failure, NotFoundException if entry doesn't exist + */ void decrease_size(const std::string& key, size_t size); - std::vector> + /** + * @brief Return all file names and modes for the first-level entries of the + * given directory. + * @param dir directory prefix string + * @return vector of pair , + * where name is the name of the entries and is_dir + * is true in the case the entry is a directory. + */ + [[nodiscard]] std::vector> get_dirents(const std::string& dir) const; - std::vector> + /** + * @brief Return all file names and modes for the first-level entries of the + * given directory including their sizes and creation time. + * @param dir directory prefix string + * @return vector of pair , + * where name is the name of the entries and is_dir + * is true in the case the entry is a directory. + */ + [[nodiscard]] std::vector> get_dirents_extended(const std::string& dir) const; + /** + * @brief Iterate over complete database, note ONLY used for debugging and + * is therefore unused. + */ void iterate_all(); }; diff --git a/include/daemon/daemon.hpp b/include/daemon/daemon.hpp index bfeea7976..994df7daa 100644 --- a/include/daemon/daemon.hpp +++ b/include/daemon/daemon.hpp @@ -25,7 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ - +/** + * @brief The main header file defining singletons and including needed headers + * in the daemon. + */ #ifndef GKFS_DAEMON_DAEMON_HPP #define GKFS_DAEMON_DAEMON_HPP @@ -47,8 +50,14 @@ extern "C" { #include #define GKFS_DATA \ - (static_cast(gkfs::daemon::FsData::getInstance())) + (static_cast( \ + gkfs::daemon::FsData::getInstance())) ///< GKFS_DATA macro to access + ///< the FsData singleton + ///< across the daemon #define RPC_DATA \ - (static_cast(gkfs::daemon::RPCData::getInstance())) + (static_cast( \ + gkfs::daemon::RPCData::getInstance())) ///< RPC_DATA macro to access + ///< the RPCData singleton + ///< across the daemon #endif // GKFS_DAEMON_DAEMON_HPP diff --git a/include/daemon/env.hpp b/include/daemon/env.hpp index 0ff8ff03b..d35fd0886 100644 --- a/include/daemon/env.hpp +++ b/include/daemon/env.hpp @@ -25,6 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief This file defines all prefixes for environment variables that can be + * set by the user. + */ #ifndef GKFS_DAEMON_ENV #define GKFS_DAEMON_ENV diff --git a/include/daemon/handler/rpc_defs.hpp b/include/daemon/handler/rpc_defs.hpp index 31317769b..b1b5262eb 100644 --- a/include/daemon/handler/rpc_defs.hpp +++ b/include/daemon/handler/rpc_defs.hpp @@ -25,6 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Declare all Margo RPC handler functions by name that + * were registered in the daemon's main source file. + */ #ifndef GKFS_DAEMON_RPC_DEFS_HPP #define GKFS_DAEMON_RPC_DEFS_HPP diff --git a/include/daemon/handler/rpc_util.hpp b/include/daemon/handler/rpc_util.hpp index 3837bb2e5..5bc62a1e9 100644 --- a/include/daemon/handler/rpc_util.hpp +++ b/include/daemon/handler/rpc_util.hpp @@ -25,6 +25,13 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Provide helper functions for Margo's RPC interfaces reduce code + * verbosity of the RPC handler functions. + * @internal + * Note, this is a temporary solution and is planned to be refactored. + * @endinternal + */ #ifndef GEKKOFS_DAEMON_RPC_UTIL_HPP #define GEKKOFS_DAEMON_RPC_UTIL_HPP @@ -39,6 +46,16 @@ extern "C" { namespace gkfs::rpc { +/** + * @brief Frees all given RPC resources allocated by Margo. + * @tparam InputType Any RPC input struct from client requests + * @tparam OutputType Any RPC output struct for client response + * @param handle Pointer to Mercury RPC handle + * @param input Pointer to input struct + * @param output Pointer to output struct + * @param bulk_handle Pointer to Mercury bulk handle + * @return Mercury error code. HG_SUCCESS on success. + */ template inline hg_return_t cleanup(hg_handle_t* handle, InputType* input, OutputType* output, @@ -67,6 +84,20 @@ cleanup(hg_handle_t* handle, InputType* input, OutputType* output, return ret; } +/** + * @brief Responds to a client request. + * @internal + * Note, Mercury frees the output struct itself after it responded to the + * client. Attempting to explicitly free the output struct can cause segfaults + * because the response is non-blocking and we could free the resources before + * Mercury has responded. + * @endinternal + * + * @tparam OutputType Any RPC output struct for client response + * @param handle Pointer to Mercury RPC handle + * @param output Pointer to output struct + * @return Mercury error code. HG_SUCCESS on success. + */ template inline hg_return_t respond(hg_handle_t* handle, OutputType* output) { @@ -78,7 +109,17 @@ respond(hg_handle_t* handle, OutputType* output) { } return ret; } - +/** + * @brief Combines responding to the client and cleaning up all RPC resources + * after. + * @tparam InputType Any RPC input struct from client requests + * @tparam OutputType Any RPC output struct for client response + * @param handle Pointer to Mercury RPC handle + * @param input Pointer to input struct + * @param output Pointer to output struct + * @param bulk_handle Pointer to Mercury bulk handle + * @return Mercury error code. HG_SUCCESS on success. + */ template inline hg_return_t cleanup_respond(hg_handle_t* handle, InputType* input, OutputType* output, @@ -89,13 +130,29 @@ cleanup_respond(hg_handle_t* handle, InputType* input, OutputType* output, return cleanup(handle, input, static_cast(nullptr), bulk_handle); } - +/** + * @brief Combines responding to the client and cleaning up all RPC resources + * after. + * @tparam InputType Any RPC input struct from client requests + * @tparam OutputType Any RPC output struct for client response + * @param handle Pointer to Mercury RPC handle + * @param input Pointer to input struct + * @param output Pointer to output struct + * @return Mercury error code. HG_SUCCESS on success. + */ template inline hg_return_t cleanup_respond(hg_handle_t* handle, InputType* input, OutputType* output) { return cleanup_respond(handle, input, output, nullptr); } - +/** + * @brief Combines responding to the client and cleaning up all RPC resources + * after. + * @tparam OutputType Any RPC output struct for client response + * @param handle Pointer to Mercury RPC handle + * @param output Pointer to output struct + * @return Mercury error code. HG_SUCCESS on success. + */ template inline hg_return_t cleanup_respond(hg_handle_t* handle, OutputType* output) { diff --git a/include/daemon/ops/data.hpp b/include/daemon/ops/data.hpp index 4754fa0ce..75dbd7b20 100644 --- a/include/daemon/ops/data.hpp +++ b/include/daemon/ops/data.hpp @@ -25,6 +25,18 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Classes to encapsulate asynchronous chunk operations. + * All operations on chunk files must go through the Argobots' task queues. + * Otherwise operations may overtake operations in the I/O queues. + * This applies to write, read, and truncate which may modify the middle of a + * chunk, essentially a write operation. + * + * In the future, this class may be used to provide failure tolerance for IO + * tasks + * + * Base class using the CRTP idiom + */ #ifndef GEKKOFS_DAEMON_DATA_HPP #define GEKKOFS_DAEMON_DATA_HPP @@ -42,67 +54,106 @@ extern "C" { namespace gkfs::data { +/** + * @brief Internal Exception for all general chunk operations. + */ class ChunkOpException : public std::runtime_error { public: explicit ChunkOpException(const std::string& s) : std::runtime_error(s){}; }; - +/** + * @brief Internal Exception for all chunk write operations. + */ class ChunkWriteOpException : public ChunkOpException { public: explicit ChunkWriteOpException(const std::string& s) : ChunkOpException(s){}; }; - +/** + * @brief Internal Exception for all chunk read operations. + */ class ChunkReadOpException : public ChunkOpException { public: explicit ChunkReadOpException(const std::string& s) : ChunkOpException(s){}; }; - +/** + * @brief Internal Exception for all chunk metadata operations. + */ class ChunkMetaOpException : public ChunkOpException { public: explicit ChunkMetaOpException(const std::string& s) : ChunkOpException(s){}; }; - /** - * Classes to encapsulate asynchronous chunk operations. - * All operations on chunk files must go through the Argobots' task queues. - * Otherwise operations may overtake operations in the queues. - * This applies to write, read, and truncate which may modify the middle of a - * chunk, essentially a write operation. + * @brief Base class (using CRTP idiom) for all chunk operations. * - * Note: This class is not thread-safe. + * This class is not thread-safe. + * @internal + * Each I/O operation, i.e., an write or read RPC request, operating on one or + * multiple chunks is represented by a corresponding ChunkOperation object. To + * keep conflicting operations of other I/O requests on the same chunk in order, + * Argobots tasklets are used. Tasklets are lightweight threads compared to + * User-Level Threads (ULTs). When ULTs run in an ES, their execution may be + * interleaved inside an ES because they can yield control to the scheduler or + * another ULT. If this happens during a write, for example, and data is written + * after one another while sitting in the queue, data might get written in the + * wrong order. Tasklets are an efficient way to prevent this. * - * In the future, this class may be used to provide failure tolerance for IO - * tasks + * Each ChunkOperation includes the path to the directory where all chunks are + * located, a number of tasks (one for each chunk), and their corresponding + * eventuals (one for each task). ABT_eventuals offer a similar concept as + * std::future to provide call-back functionality. * - * Base class using the CRTP idiom + * Truncate requests also create a ChunkOperation since it requires removing a + * number of chunks and must honor the same order of operations to chunks. + * + * In the future, additional optimizations can be made since atomicity of the + * tasklets might be too long if they depend on the results of a, e.g., pread(). + * Therefore, a queue per chunk could be beneficial (this has not been tested + * yet). + * + * Note, at this time, CRTP is only required for `cancel_all_tasks()`. + * + * @endinternal + * @tparam OperationType for write, read, and truncate. */ template class ChunkOperation { protected: - const std::string path_; + const std::string path_; //!< Path to the chunk directory of the file - std::vector abt_tasks_; - std::vector task_eventuals_; + std::vector abt_tasks_; //!< Tasklets operating on the file + std::vector + task_eventuals_; //!< Eventuals for tasklet callbacks public: + /** + * @brief Constructor for a single chunk operation. + * @param path Path to chunk directory + */ explicit ChunkOperation(const std::string& path) : ChunkOperation(path, 1){}; + /** + * @brief Constructor to initialize tasklet and eventual lists. + * @param path Path to chunk directory + * @param n Number of chunk operations by I/O request + */ ChunkOperation(std::string path, size_t n) : path_(std::move(path)) { // Knowing n beforehand is important and cannot be dynamic. Otherwise // eventuals cause seg faults abt_tasks_.resize(n); task_eventuals_.resize(n); }; - + /** + * Destructor calls cancel_all_tasks to clean up all used resources. + */ ~ChunkOperation() { cancel_all_tasks(); } /** - * Cleans up and cancels all tasks in flight + * @brief Cancels all tasks in-flight and free resources. */ void cancel_all_tasks() { @@ -125,22 +176,33 @@ public: } }; - +/** + * @brief Chunk operation class for truncate operations. + * + * Note, a truncate operation is a special case and forced to only use a single + * task. + */ class ChunkTruncateOperation : public ChunkOperation { friend class ChunkOperation; private: struct chunk_truncate_args { - const std::string* path; - size_t size; - ABT_eventual eventual; - }; - - struct chunk_truncate_args task_arg_ {}; + const std::string* path; //!< Path to affected chunk directory + size_t size; //!< GekkoFS file offset (_NOT_ chunk file) to truncate to + ABT_eventual eventual; //!< Attached eventual + }; //!< Struct for a truncate operation + struct chunk_truncate_args task_arg_ {}; //!< tasklet input struct + /** + * @brief Exclusively used by the Argobots tasklet. + * @param _arg Pointer to input struct of type . Error + * code is placed into eventual to signal its failure or success. + */ static void truncate_abt(void* _arg); - + /** + * @brief Resets the task_arg_ struct. + */ void clear_task_args(); @@ -149,31 +211,51 @@ public: ~ChunkTruncateOperation() = default; + /** + * @brief Truncate request called by RPC handler function and launches a + * non-blocking tasklet. + * @param size GekkoFS file offset (_NOT_ chunk file) to truncate to + * @throws ChunkMetaOpException + */ void truncate(size_t size); - + /** + * @brief Wait for the truncate tasklet to finish. + * @return Error code for success (0) or failure + */ int wait_for_task(); }; +/** + * @brief Chunk operation class for write operations with one object per write + * RPC request. May involve multiple I/O task depending on the number of chunks + * involved. + */ class ChunkWriteOperation : public ChunkOperation { friend class ChunkOperation; private: struct chunk_write_args { - const std::string* path; - const char* buf; - gkfs::rpc::chnk_id_t chnk_id; - size_t size; - off64_t off; - ABT_eventual eventual; - }; - - std::vector task_args_; - + const std::string* path; //!< Path to affected chunk directory + const char* buf; //!< Buffer for chunk + gkfs::rpc::chnk_id_t chnk_id; //!< chunk id that is affected + size_t size; //!< size to write for chunk + off64_t off; //!< offset for individual chunk + ABT_eventual eventual; //!< Attached eventual + }; //!< Struct for an chunk write operation + + std::vector task_args_; //!< tasklet input structs + /** + * @brief Exclusively used by the Argobots tasklet. + * @param _arg Pointer to input struct of type . Error + * code is placed into eventual to signal its failure or success. + */ static void write_file_abt(void* _arg); - + /** + * @brief Resets the task_arg_ struct. + */ void clear_task_args(); @@ -182,55 +264,95 @@ public: ~ChunkWriteOperation() = default; + /** + * @brief Write request called by RPC handler function and launches a + * non-blocking tasklet. + * @param idx Number of non-blocking write for write RPC request + * @param chunk_id The affected chunk id + * @param bulk_buf_ptr The buffer to write for the chunk + * @param size Size to write for chunk + * @param offset Offset for individual chunk + * @throws ChunkWriteOpException + */ void write_nonblock(size_t idx, uint64_t chunk_id, const char* bulk_buf_ptr, size_t size, off64_t offset); + /** + * @brief Wait for all write tasklets to finish. + * @return Pair for error code for success (0) or failure and written size + */ std::pair wait_for_tasks(); }; - +/** + * @brief Chunk operation class for read operations with one object per read + * RPC request. May involve multiple I/O task depending on the number of chunks + * involved. + */ class ChunkReadOperation : public ChunkOperation { friend class ChunkOperation; private: struct chunk_read_args { - const std::string* path; - char* buf; - gkfs::rpc::chnk_id_t chnk_id; - size_t size; - off64_t off; - ABT_eventual eventual; - }; - - std::vector task_args_; - + const std::string* path; //!< Path to affected chunk directory + char* buf; //!< Buffer for chunk + gkfs::rpc::chnk_id_t chnk_id; //!< chunk id that is affected + size_t size; //!< size to read from chunk + off64_t off; //!< offset for individual chunk + ABT_eventual eventual; //!< Attached eventual + }; //!< Struct for an chunk read operation + + std::vector task_args_; //!< tasklet input structs + /** + * @brief Exclusively used by the Argobots tasklet. + * @param _arg Pointer to input struct of type . Error + * code is placed into eventual to signal its failure or success. + */ static void read_file_abt(void* _arg); - + /** + * @brief Resets the task_arg_ struct. + */ void clear_task_args(); public: struct bulk_args { - margo_instance_id mid; - hg_addr_t origin_addr; - hg_bulk_t origin_bulk_handle; - std::vector* origin_offsets; - hg_bulk_t local_bulk_handle; - std::vector* local_offsets; - std::vector* chunk_ids; - }; + margo_instance_id mid; //!< Margo instance ID of server + hg_addr_t origin_addr; //!< abstract address of client + hg_bulk_t origin_bulk_handle; //!< bulk handle from client + std::vector* origin_offsets; //!< offsets in origin buffer + hg_bulk_t local_bulk_handle; //!< local bulk handle for PUSH + std::vector* local_offsets; //!< offsets in local buffer + std::vector* chunk_ids; //!< all chunk ids in this read + }; //!< Struct to push read data to the client ChunkReadOperation(const std::string& path, size_t n); ~ChunkReadOperation() = default; + /** + * @brief Read request called by RPC handler function and launches a + * non-blocking tasklet. + * @param idx Number of non-blocking write for write RPC request + * @param chunk_id The affected chunk id + * @param bulk_buf_ptr The buffer for reading chunk + * @param size Size to read from chunk + * @param offset Offset for individual chunk + * @throws ChunkReadOpException + */ void read_nonblock(size_t idx, uint64_t chunk_id, char* bulk_buf_ptr, size_t size, off64_t offset); + /** + * @brief Waits for all local I/O operations to finish and push buffers back + * to the daemon. + * @param args Bulk_args for push transfer + * @return Pair for error code for success (0) or failure and read size + */ std::pair wait_for_tasks_and_push_back(const bulk_args& args); }; diff --git a/include/daemon/util.hpp b/include/daemon/util.hpp index 9d10a5f5c..a0f2d7f63 100644 --- a/include/daemon/util.hpp +++ b/include/daemon/util.hpp @@ -25,14 +25,24 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Utility functions for the daemon context. + */ #ifndef GEKKOFS_DAEMON_UTIL_HPP #define GEKKOFS_DAEMON_UTIL_HPP namespace gkfs::utils { +/** + * @brief Registers the daemon's RPC address to the shared hosts file. + * @throws std::runtime_error when file stream fails + */ void populate_hosts_file(); +/** + * @brief Attempts to remove the entire hosts file. + */ void destroy_hosts_file(); } // namespace gkfs::utils diff --git a/src/daemon/backend/data/chunk_storage.cpp b/src/daemon/backend/data/chunk_storage.cpp index ace96f1b6..c06c76856 100644 --- a/src/daemon/backend/data/chunk_storage.cpp +++ b/src/daemon/backend/data/chunk_storage.cpp @@ -25,6 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Chunk storage definitions handles all interactions with the node-local + * storage system. + */ #include #include @@ -55,6 +59,17 @@ ChunkStorage::absolute(const string& internal_path) const { return fmt::format("{}/{}", root_path_, internal_path); } +/** + * @internal + * All GekkoFS files are placed within the rootdir directory, each GekkoFS file + * is represented by a directory on the local file system. + * We do not mirror a directory hierarchy on the local file system. + * Therefore the path /mnt/gekkofs_mount/foo/bar has the internal path + * /tmp/rootdir//data/chunks/foo:bar. Each chunk is then its own file, + * numbered by its index, e.g., /tmp/rootdir//data/chunks/foo:bar/0 for the + * first chunk file. + * @endinternal + */ string ChunkStorage::get_chunks_dir(const string& file_path) { assert(gkfs::path::is_absolute(file_path)); @@ -69,12 +84,6 @@ ChunkStorage::get_chunk_path(const string& file_path, return fmt::format("{}/{}", get_chunks_dir(file_path), chunk_id); } -/** - * Creates a chunk directory that all chunk files are placed in. - * The path to the real file will be used as the directory name - * @param file_path - * @throws ChunkStorageException on error - */ void ChunkStorage::init_chunk_space(const string& file_path) const { auto chunk_dir = absolute(get_chunks_dir(file_path)); @@ -89,12 +98,6 @@ ChunkStorage::init_chunk_space(const string& file_path) const { // public functions -/** - * - * @param path - * @param chunksize - * @throws ChunkStorageException - */ ChunkStorage::ChunkStorage(string& path, const size_t chunksize) : root_path_(path), chunksize_(chunksize) { /* Get logger instance and set it for data module and chunk storage */ @@ -114,11 +117,6 @@ ChunkStorage::ChunkStorage(string& path, const size_t chunksize) root_path_); } -/** - * Removes chunk directory with all its files - * @param file_path - * @throws ChunkStorageException - */ void ChunkStorage::destroy_chunk_space(const string& file_path) const { auto chunk_dir = absolute(get_chunks_dir(file_path)); @@ -136,20 +134,11 @@ ChunkStorage::destroy_chunk_space(const string& file_path) const { } /** - * Writes a chunk file. - * On failure throws ChunkStorageException with encapsulated error code - * + * @internal * Refer to * https://www.gnu.org/software/libc/manual/html_node/I_002fO-Primitives.html - * for pwrite behavior - * - * @param file_path - * @param chunk_id - * @param buf - * @param size - * @param offset - * @param eventual - * @throws ChunkStorageException (caller will handle eventual signalling) + * for pwrite behavior. + * @endinternal */ ssize_t ChunkStorage::write_chunk(const string& file_path, @@ -196,19 +185,11 @@ ChunkStorage::write_chunk(const string& file_path, } /** - * Read from a chunk file. - * On failure throws ChunkStorageException with encapsulated error code - * + * @internal * Refer to * https://www.gnu.org/software/libc/manual/html_node/I_002fO-Primitives.html * for pread behavior - * @param file_path - * @param chunk_id - * @param buf - * @param size - * @param offset - * @param eventual - * @throws ChunkStorageException (caller will handle eventual signalling) + * @endinternal */ ssize_t ChunkStorage::read_chunk(const string& file_path, gkfs::rpc::chnk_id_t chunk_id, @@ -266,18 +247,16 @@ ChunkStorage::read_chunk(const string& file_path, gkfs::rpc::chnk_id_t chunk_id, } /** - * Delete all chunks starting with chunk a chunk id. + * @internal * Note eventual consistency here: While chunks are removed, there is no lock * that prevents other processes from modifying anything in that directory. It * is the application's responsibility to stop modifying the file while truncate - * is executed + * is executed. * * If an error is encountered when removing a chunk file, the function will * still remove all files and report the error afterwards with * ChunkStorageException. - * @param file_path - * @param chunk_start - * @throws ChunkStorageException + * @endinternal */ void ChunkStorage::trim_chunk_space(const string& file_path, @@ -308,13 +287,6 @@ ChunkStorage::trim_chunk_space(const string& file_path, __func__, file_path)); } -/** - * Truncates a single chunk file to a given length - * @param file_path - * @param chunk_id - * @param length - * @throws ChunkStorageException - */ void ChunkStorage::truncate_chunk_file(const string& file_path, gkfs::rpc::chnk_id_t chunk_id, off_t length) { @@ -331,10 +303,12 @@ ChunkStorage::truncate_chunk_file(const string& file_path, } /** - * Calls statfs on the chunk directory to get statistic on its used and free - * size left - * @return ChunkStat - * @throws ChunkStorageException + * @internal + * Return ChunkStat with following fields: + * unsigned long chunk_size; + unsigned long chunk_total; + unsigned long chunk_free; + * @endinternal */ ChunkStat ChunkStorage::chunk_stat() const { diff --git a/src/daemon/backend/data/data_module.cpp b/src/daemon/backend/data/data_module.cpp index 7993b8312..414d57598 100644 --- a/src/daemon/backend/data/data_module.cpp +++ b/src/daemon/backend/data/data_module.cpp @@ -25,7 +25,10 @@ SPDX-License-Identifier: GPL-3.0-or-later */ - +/** + * @brief Definition for GekkoFS data module object that is run once per daemon + * instance. + */ #include namespace gkfs::data { diff --git a/src/daemon/backend/metadata/db.cpp b/src/daemon/backend/metadata/db.cpp index 53a094533..5d140f856 100644 --- a/src/daemon/backend/metadata/db.cpp +++ b/src/daemon/backend/metadata/db.cpp @@ -40,11 +40,33 @@ extern "C" { namespace gkfs::metadata { +// private functions /** - * Called when the daemon is started: Connects to the KV store - * @param path where KV store data is stored + * @internal + * Called when RocksDB connection is established. + * Used for setting KV store settings + * see here: https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide + * @endinternal */ +void +MetadataDB::optimize_rocksdb_options(rdb::Options& options) { + options.max_successive_merges = 128; +} + +// public functions + +void +MetadataDB::throw_rdb_status_excpt(const rdb::Status& s) { + assert(!s.ok()); + + if(s.IsNotFound()) { + throw NotFoundException(s.ToString()); + } else { + throw DBException(s.ToString()); + } +} + MetadataDB::MetadataDB(const std::string& path) : path(path) { /* Get logger instance and set it for data module and chunk storage */ @@ -69,29 +91,6 @@ MetadataDB::MetadataDB(const std::string& path) : path(path) { this->db.reset(rdb_ptr); } -/** - * Exception wrapper on Status object. Throws NotFoundException if - * s.IsNotFound(), general DBException otherwise - * @param RocksDB status - * @throws DBException - */ -void -MetadataDB::throw_rdb_status_excpt(const rdb::Status& s) { - assert(!s.ok()); - - if(s.IsNotFound()) { - throw NotFoundException(s.ToString()); - } else { - throw DBException(s.ToString()); - } -} - -/** - * Gets a KV store value for a key - * @param key - * @return value - * @throws DBException on failure, NotFoundException if entry doesn't exist - */ std::string MetadataDB::get(const std::string& key) const { std::string val; @@ -102,12 +101,6 @@ MetadataDB::get(const std::string& key) const { return val; } -/** - * Puts an entry into the KV store - * @param key - * @param val - * @throws DBException on failure - */ void MetadataDB::put(const std::string& key, const std::string& val) { assert(gkfs::path::is_absolute(key)); @@ -121,11 +114,9 @@ MetadataDB::put(const std::string& key, const std::string& val) { } /** - * Puts an entry into the KV store if it doesn't exist. This function does not - * use a mutex. - * @param key - * @param val - * @throws DBException on failure, ExistException if entry already exists + * @internal + * This function does not use a mutex. + * @endinternal */ void MetadataDB::put_no_exist(const std::string& key, const std::string& val) { @@ -134,11 +125,6 @@ MetadataDB::put_no_exist(const std::string& key, const std::string& val) { put(key, val); } -/** - * Removes an entry from the KV store - * @param key - * @throws DBException on failure, NotFoundException if entry doesn't exist - */ void MetadataDB::remove(const std::string& key) { auto s = db->Delete(write_opts, key); @@ -147,12 +133,6 @@ MetadataDB::remove(const std::string& key) { } } -/** - * checks for existence of an entry - * @param key - * @return true if exists - * @throws DBException on failure - */ bool MetadataDB::exists(const std::string& key) { std::string val; @@ -167,13 +147,6 @@ MetadataDB::exists(const std::string& key) { return true; } -/** - * Updates a metadentry atomically and also allows to change keys - * @param old_key - * @param new_key - * @param val - * @throws DBException on failure, NotFoundException if entry doesn't exist - */ void MetadataDB::update(const std::string& old_key, const std::string& new_key, const std::string& val) { @@ -188,12 +161,9 @@ MetadataDB::update(const std::string& old_key, const std::string& new_key, } /** - * Increases only the size part of the metadentry via a RocksDB Operand - * Operation. E.g., called before a write() call - * @param key - * @param size - * @param append - * @throws DBException on failure + * @internal + * E.g., called before a write() call + * @endinternal */ void MetadataDB::increase_size(const std::string& key, size_t size, bool append) { @@ -205,11 +175,9 @@ MetadataDB::increase_size(const std::string& key, size_t size, bool append) { } /** - * Decreases only the size part of the metadentry via a RocksDB Operand - * Operation E.g., called before a truncate() call - * @param key - * @param size - * @throws DBException on failure + * @internal + * E.g., called before a truncate() call + * @endinternal */ void MetadataDB::decrease_size(const std::string& key, size_t size) { @@ -220,13 +188,6 @@ MetadataDB::decrease_size(const std::string& key, size_t size) { } } -/** - * Return all the first-level entries of the directory @dir - * - * @return vector of pair , - * where name is the name of the entries and is_dir - * is true in the case the entry is a directory. - */ std::vector> MetadataDB::get_dirents(const std::string& dir) const { auto root_path = dir; @@ -271,13 +232,6 @@ MetadataDB::get_dirents(const std::string& dir) const { return entries; } -/** - * Return all the first-level entries of the directory @dir - * - * @return vector of pair , - * where name is the name of the entries and is_dir - * is true in the case the entry is a directory. - */ std::vector> MetadataDB::get_dirents_extended(const std::string& dir) const { auto root_path = dir; @@ -325,8 +279,10 @@ MetadataDB::get_dirents_extended(const std::string& dir) const { /** + * @internal * Code example for iterating all entries in KV store. This is for debug only as - * it is too expensive + * it is too expensive. + * @endinternal */ void MetadataDB::iterate_all() { @@ -340,15 +296,4 @@ MetadataDB::iterate_all() { } } -/** - * Called when RocksDB connection is established. - * Used for setting KV store settings - * see here: https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide - * @param options - */ -void -MetadataDB::optimize_rocksdb_options(rdb::Options& options) { - options.max_successive_merges = 128; -} - } // namespace gkfs::metadata diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 93bf08629..456bcac13 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -25,6 +25,13 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief The main source file to launch the daemon. + * @internal + * This file includes the daemon's main() function and starts all daemon + * subroutines. It deals with user input and waits on a signal to shut it down. + * @endinternal + */ #include #include @@ -60,8 +67,8 @@ extern "C" { using namespace std; namespace fs = std::filesystem; -static condition_variable shutdown_please; -static mutex mtx; +static condition_variable shutdown_please; // handler for shutdown signaling +static mutex mtx; // mutex to wait on shutdown conditional variable struct cli_options { string mountdir; @@ -72,6 +79,15 @@ struct cli_options { string rpc_protocol; }; +/** + * @brief Initializes the Argobots execution streams for non-blocking I/O + * @internal + * The corresponding execution streams are defined in + * gkfs::config::rpc::daemon_io_xstreams. A FIFO thread pool accomodates these + * execution streams. Argobots tasklets are created from these pools during I/O + * operations. + * @endinternal + */ void init_io_tasklet_pool() { static_assert(gkfs::config::rpc::daemon_io_xstreams >= 0, @@ -103,8 +119,12 @@ init_io_tasklet_pool() { } /** - * Registers RPC handlers to Margo instance - * @param hg_class + * @brief Registers RPC handlers to a given Margo instance. + * @internal + * Registering is done by associating a Margo instance id (mid) with the RPC + * name and its handler function including defined input/out structs + * @endinternal + * @param margo_instance_id */ void register_server_rpcs(margo_instance_id mid) { @@ -148,6 +168,10 @@ register_server_rpcs(margo_instance_id mid) { rpc_chunk_stat_out_t, rpc_srv_get_chunk_stat); } +/** + * @brief Initializes the daemon RPC server. + * @throws std::runtime_error on failure + */ void init_rpc_server() { hg_addr_t addr_self = nullptr; @@ -201,6 +225,17 @@ init_rpc_server() { register_server_rpcs(mid); } +/** + * @brief Initializes the daemon environment and setting up its subroutines. + * @internal + * This includes connecting to the KV store, starting the Argobots I/O execution + * streams, initializing the metadata and data backends, and starting the RPC + * server. + * + * Finally, the root metadata entry is created. + * @endinternal + * @throws std::runtime_error if any step fails + */ void init_environment() { // Initialize metadata db @@ -311,7 +346,7 @@ init_environment() { #ifdef GKFS_ENABLE_AGIOS /** - * Initialize the AGIOS scheduling library + * @brief Initialize the AGIOS scheduling library */ void agios_initialize() { @@ -330,7 +365,12 @@ agios_initialize() { #endif /** - * Destroys the margo, argobots, and mercury environments + * @brief Destroys the daemon environment and gracefully shuts down all + * subroutines. + * @internal + * Shutting down includes freeing Argobots execution streams, cleaning + * hostsfile, and shutting down the Mercury RPC server. + * @endinternal */ void destroy_enviroment() { @@ -364,6 +404,13 @@ destroy_enviroment() { GKFS_DATA->close_mdb(); } +/** + * @brief Handler for daemon shutdown signal handling. + * @internal + * Notifies the waiting thread in main() to wake up. + * @endinternal + * @param dummy unused but required by signal() called in main() + */ void shutdown_handler(int dummy) { GKFS_DATA->spdlogger()->info("{}() Received signal: '{}'", __func__, @@ -371,6 +418,14 @@ shutdown_handler(int dummy) { shutdown_please.notify_all(); } +/** + * @brief Initializes the daemon logging environment. + * @internal + * Reads user input via environment variables regarding the + * log path and log level. + * @endinternal + * Initializes three loggers: main, metadata module, and data module + */ void initialize_loggers() { std::string path = gkfs::config::log::daemon_log_path; @@ -402,9 +457,10 @@ initialize_loggers() { } /** - * Parses input of user - * @param vm - * @throws runtime_error + * @brief Parses command line arguments from user + * + * @param vm boost::program_options variable_map + * @throws std::runtime_error */ void parse_input(const cli_options& opts, const CLI::App& desc) { @@ -514,6 +570,20 @@ parse_input(const cli_options& opts, const CLI::App& desc) { } } +/** + * @brief The initial function called when launching the daemon. + * @internal + * Launches all subroutines and waits on a conditional variable to shut it down. + * Daemon will react to the following signals: + * + * SIGINT - Interrupt from keyboard (ctrl-c) + * SIGTERM - Termination signal (kill + * SIGKILL - Kill signal (kill -9 + * @endinternal + * @param argc number of command line arguments + * @param argv list of the command line arguments + * @return exit status: EXIT_SUCCESS (0) or EXIT_FAILURE (1) + */ int main(int argc, const char* argv[]) { // Define arg parsing @@ -575,7 +645,7 @@ main(int argc, const char* argv[]) { #endif cout << "Chunk size: " << gkfs::config::rpc::chunksize << " bytes" << endl; - return 0; + return EXIT_SUCCESS; } // intitialize logging framework initialize_loggers(); @@ -587,7 +657,7 @@ main(int argc, const char* argv[]) { } catch(const std::exception& e) { cerr << fmt::format("Parsing arguments failed: '{}'. Exiting.", e.what()); - exit(EXIT_FAILURE); + return EXIT_FAILURE; } /* @@ -603,7 +673,7 @@ main(int argc, const char* argv[]) { GKFS_DATA->spdlogger()->error(emsg); cerr << emsg << endl; destroy_enviroment(); - exit(EXIT_FAILURE); + return EXIT_FAILURE; } signal(SIGINT, shutdown_handler); @@ -616,5 +686,5 @@ main(int argc, const char* argv[]) { GKFS_DATA->spdlogger()->info("{}() Shutting down...", __func__); destroy_enviroment(); GKFS_DATA->spdlogger()->info("{}() Complete. Exiting...", __func__); - return 0; + return EXIT_SUCCESS; } diff --git a/src/daemon/handler/srv_data.cpp b/src/daemon/handler/srv_data.cpp index 59abace2e..058f7be46 100644 --- a/src/daemon/handler/srv_data.cpp +++ b/src/daemon/handler/srv_data.cpp @@ -25,7 +25,14 @@ SPDX-License-Identifier: GPL-3.0-or-later */ - +/** + * @brief Provides all Margo RPC handler definitions called by Mercury on client + * request for all file system data operations. + * @internal + * The end of the file defines the associates the Margo RPC handler functions + * and associates them with their corresponding GekkoFS handler functions. + * @endinternal + */ #include #include #include @@ -45,17 +52,41 @@ #endif using namespace std; -/* - * This file contains all Margo RPC handlers that are concerning management - * operations - */ namespace { /** - * RPC handler for an incoming write RPC - * @param handle - * @return + * @brief Serves a write request transferring the chunks associated with this + * daemon and store them on the node-local FS. + * @internal + * The write operation has multiple steps: + * 1. Setting up all RPC related information + * 2. Allocating space for bulk transfer buffers + * 3. By processing the RPC input, the chunk IDs that are hashing to this daemon + * are computed based on a client-defined interval (start and endchunk id for + * this write operation). The client does _not_ provide the daemons with a list + * of chunk IDs because it is dynamic data that cannot be part of an RPC input + * struct. Therefore, this information would need to be pulled with a bulk + * transfer as well, adding unnecessary latency to the overall write operation. + * + * For each relevant chunk, a PULL bulk transfer is issued. Once finished, a + * non-blocking Argobots tasklet is launched to write the data chunk to the + * backend storage. Therefore, bulk transfer and the backend I/O operation are + * overlapping for efficiency. + * 4. Wait for all tasklets to complete adding up all the complete written data + * size as reported by each task. + * 5. Respond to client (when all backend write operations are finished) and + * cleanup RPC resources. Any error is reported in the RPC output struct. Note, + * that backend write operations are not canceled while in-flight when a task + * encounters an error. + * + * Note, refer to the data backend documentation w.r.t. how Argobots tasklets + * work and why they are used. + * + * All exceptions must be caught here and dealt with accordingly. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury */ hg_return_t rpc_srv_write(hg_handle_t handle) { @@ -315,9 +346,37 @@ rpc_srv_write(hg_handle_t handle) { } /** - * RPC handler for an incoming read RPC - * @param handle - * @return + * @brief Serves a read request reading the chunks associated with this + * daemon from the node-local FS and transferring them back to the client. + * @internal + * The read operation has multiple steps: + * 1. Setting up all RPC related information + * 2. Allocating space for bulk transfer buffers + * 3. By processing the RPC input, the chunk IDs that are hashing to this daemon + * are computed based on a client-defined interval (start and endchunk id for + * this read operation). The client does _not_ provide the daemons with a list + * of chunk IDs because it is dynamic data that cannot be part of an RPC input + * struct. Therefore, this information would need to be pulled with a bulk + * transfer as well, adding unnecessary latency to the overall write operation. + * + * For each relevant chunk, a non-blocking Arbobots tasklet is launched to read + * the data chunk from the backend storage to the allocated buffers. + * 4. Wait for all tasklets to finish the read operation while PUSH bulk + * transferring each chunk back to the client when a tasklet finishes. + * Therefore, bulk transfer and the backend I/O operation are overlapping for + * efficiency. The read size is added up for all tasklets. + * 5. Respond to client (when all bulk transfers are finished) and cleanup RPC + * resources. Any error is reported in the RPC output struct. Note, that backend + * read operations are not canceled while in-flight when a task encounters an + * error. + * + * Note, refer to the data backend documentation w.r.t. how Argobots tasklets + * work and why they are used. + * + * All exceptions must be caught here and dealt with accordingly. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury */ hg_return_t rpc_srv_read(hg_handle_t handle) { @@ -387,7 +446,7 @@ rpc_srv_read(hg_handle_t handle) { #endif /* - * 2. Set up buffers for pull bulk transfers + * 2. Set up buffers for push bulk transfers */ void* bulk_buf; // buffer for bulk transfer vector bulk_buf_ptrs(in.chunk_n); // buffer-chunk offsets @@ -547,9 +606,17 @@ rpc_srv_read(hg_handle_t handle) { /** - * RPC handler for an incoming truncate RPC - * @param handle - * @return + * @brief Serves a file truncate request and remove all corresponding chunk + * files on this daemon. + * @internal + * A truncate operation includes decreasing the file size of the metadata entry + * (if hashing to this daemon) and removing all corresponding chunks exceeding + * the new file size. + * + * All exceptions must be caught here and dealt with accordingly. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury */ hg_return_t rpc_srv_truncate(hg_handle_t handle) { @@ -588,9 +655,13 @@ rpc_srv_truncate(hg_handle_t handle) { /** - * RPC handler for an incoming chunk stat RPC - * @param handle - * @return + * @brief Serves a chunk stat request, responding with space information of the + * node local file system. + * @internal + * All exceptions must be caught here and dealt with accordingly. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury */ hg_return_t rpc_srv_get_chunk_stat(hg_handle_t handle) { diff --git a/src/daemon/handler/srv_management.cpp b/src/daemon/handler/srv_management.cpp index 2a60318da..cb0b13217 100644 --- a/src/daemon/handler/srv_management.cpp +++ b/src/daemon/handler/srv_management.cpp @@ -25,7 +25,14 @@ SPDX-License-Identifier: GPL-3.0-or-later */ - +/** + * @brief Provides all Margo RPC handler definitions called by Mercury on client + * request for all file system management operations. + * @internal + * The end of the file defines the associates the Margo RPC handler functions + * and associates them with their corresponding GekkoFS handler functions. + * @endinternal + */ #include #include @@ -37,12 +44,18 @@ extern "C" { using namespace std; -/* - * This file contains all Margo RPC handlers that are concerning data operations - */ - namespace { +/** + * @brief Responds with general file system meta information requested on client + * startup. + * @internal + * Most notably this is where the client gets the information on which path + * GekkoFS is accessible. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_get_fs_config(hg_handle_t handle) { rpc_config_out_t out{}; diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index 9eda0cd54..9b13a10c4 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -25,6 +25,14 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Provides all Margo RPC handler definitions called by Mercury on client + * request for all file system metadata operations. + * @internal + * The end of the file defines the associates the Margo RPC handler functions + * and associates them with their corresponding GekkoFS handler functions. + * @endinternal + */ #include #include @@ -36,13 +44,22 @@ using namespace std; -/* - * This file contains all Margo RPC handlers that are concerning metadata - * operations - */ - namespace { +/** + * @brief Serves a file/directory create request or returns an error to the + * client if the object already exists. + * @internal + * The create request creates or updates a corresponding entry in the KV store. + * If the object already exists, the RPC output struct includes an EEXIST error + * code. This is not a hard error. Other unexpected errors are placed in the + * output struct as well. + * + * All exceptions must be caught here and dealt with accordingly. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_create(hg_handle_t handle) { rpc_mk_node_in_t in; @@ -80,7 +97,19 @@ rpc_srv_create(hg_handle_t handle) { return HG_SUCCESS; } - +/** + * @brief Serves a stat request or returns an error to the + * client if the object does not exist. + * @internal + * The stat request reads the corresponding entry in the KV store. The value + * string is directly passed to the client. It sets an error code if the object + * does not exist or in other unexpected errors. + * + * All exceptions must be caught here and dealt with accordingly. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_stat(hg_handle_t handle) { rpc_path_only_in_t in{}; @@ -122,7 +151,16 @@ rpc_srv_stat(hg_handle_t handle) { return HG_SUCCESS; } - +/** + * @brief Serves a request to decrease the file size in the object's KV store + * entry. + * @internal + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_decr_size(hg_handle_t handle) { rpc_trunc_in_t in{}; @@ -160,7 +198,25 @@ rpc_srv_decr_size(hg_handle_t handle) { return HG_SUCCESS; } - +/** + * @brief Serves a request to remove a file/directory metadata. + * @internal + * The handler triggers the removal of the KV store entry but still returns the + * file mode and size information to the client. This is because the size is + * needed to remove all data chunks. The metadata is removed first to ensure + * data isn't removed while the metadata is still available. This could cause + * issues because a stat request would say that the file still exists. + * + * gkfs::config::metadata::implicit_data_removal offers an optimization to + * implicitly remove the data chunks on the metadata node. This can increase + * remove performance for small files. + * + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_remove_metadata(hg_handle_t handle) { rpc_rm_node_in_t in{}; @@ -212,6 +268,18 @@ rpc_srv_remove_metadata(hg_handle_t handle) { return HG_SUCCESS; } +/** + * @brief Serves a request to remove all file data chunks on this daemon. + * @internal + * The handler simply issues the removal of all chunk files on the local file + * system. + * + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_remove_data(hg_handle_t handle) { rpc_rm_node_in_t in{}; @@ -252,7 +320,15 @@ rpc_srv_remove_data(hg_handle_t handle) { return HG_SUCCESS; } - +/** + * @brief Serves a request to update the metadata. This function is UNUSED. + * @internal + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_update_metadentry(hg_handle_t handle) { // Note: Currently this handler is not called by the client. @@ -304,7 +380,16 @@ rpc_srv_update_metadentry(hg_handle_t handle) { return HG_SUCCESS; } - +/** + * @brief Serves a request to update the file size to a given value in the KV + * store. + * @internal + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_update_metadentry_size(hg_handle_t handle) { rpc_update_metadentry_size_in_t in{}; @@ -352,7 +437,15 @@ rpc_srv_update_metadentry_size(hg_handle_t handle) { return HG_SUCCESS; } - +/** + * @brief Serves a request to return the current file size. + * @internal + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_get_metadentry_size(hg_handle_t handle) { rpc_path_only_in_t in{}; @@ -396,7 +489,23 @@ rpc_srv_get_metadentry_size(hg_handle_t handle) { return HG_SUCCESS; } - +/** + * @brief Serves a request to return all file system objects in a directory. + * @internal + * This handler triggers a KV store scan starting at the given path prefix that + * represents a directory. All KV store entries are returned via a bulk transfer + * as it can involve an arbitrary number of entries. + * + * Note, the bulk buffer size is decided by the client statically although it + * doesn't know if it the space is sufficient to accomodate all entries. This is + * planned to be fixed in the future. + * + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_get_dirents(hg_handle_t handle) { rpc_get_dirents_in_t in{}; @@ -533,6 +642,25 @@ rpc_srv_get_dirents(hg_handle_t handle) { * Used to accelerate find * It mimics get_dirents, but uses a tuple */ + +/** + * @brief Serves a request to return all file system objects in a directory + * including their size and create timestamp. + * @internal + * This is an extension to the above rpc_srv_get_dirents. However, this handler + * is an optimization which needs to be refactored and merged with with + * rpc_srv_get_dirents due to redundant code (TODO). + * + * Note, the bulk buffer size is decided by the client statically although it + * doesn't know if it the space is sufficient to accommodate all entries. This + * is planned to be fixed in the future (TODO). + * + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_get_dirents_extended(hg_handle_t handle) { rpc_get_dirents_in_t in{}; @@ -681,7 +809,17 @@ rpc_srv_get_dirents_extended(hg_handle_t handle) { } #ifdef HAS_SYMLINKS - +/** + * @brief Serves a request create a symbolic link. This function is UNUSED. + * @internal + * The state of this function is unclear and requires a complete refactor. + * + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ hg_return_t rpc_srv_mk_symlink(hg_handle_t handle) { rpc_mk_symlink_in_t in{}; diff --git a/src/daemon/ops/data.cpp b/src/daemon/ops/data.cpp index c8c8c4dea..0f580d89a 100644 --- a/src/daemon/ops/data.cpp +++ b/src/daemon/ops/data.cpp @@ -25,6 +25,9 @@ SPDX-License-Identifier: GPL-3.0-or-later */ +/** + * @brief Member definitions for ChunkOperation classes. + */ #include #include @@ -45,13 +48,15 @@ namespace gkfs::data { /** - * Used by an argobots tasklet. Argument args has the following fields: + * @internal + * Exclusively used by the Argobots tasklet. Argument args has the following + fields: * const string* path; size_t size; ABT_eventual* eventual; - * This function is driven by the IO pool. so there is a maximum allowed number - of concurrent operations per daemon. - * @return error is put into eventual to signal that it finished + * This function is driven by the IO pool. So, there is a maximum allowed number + of concurrent operations allowed per daemon. + * @endinternal */ void ChunkTruncateOperation::truncate_abt(void* _arg) { @@ -97,8 +102,10 @@ ChunkTruncateOperation::ChunkTruncateOperation(const string& path) : ChunkOperation{path, 1} {} /** + * @internal * Starts a tasklet for requested truncate. In essence all chunk files after the * given offset is removed Only one truncate call is allowed at a time + * @endinternal */ void ChunkTruncateOperation::truncate(size_t size) { @@ -160,6 +167,7 @@ ChunkTruncateOperation::wait_for_task() { * ------------------------------------------------------------------------*/ /** + * @internal * Used by an argobots tasklet. Argument args has the following fields: * const string* path; const char* buf; @@ -167,11 +175,11 @@ ChunkTruncateOperation::wait_for_task() { size_t size; off64_t off; ABT_eventual* eventual; - * This function is driven by the IO pool. so there is a maximum allowed number + * This function is driven by the IO pool. So, there is a maximum allowed number of concurrent IO operations per daemon. - * This function is called by tasklets, as this function cannot be allowed to + * This function is called by tasklets as this function cannot be allowed to block. - * @return written_size is put into eventual to signal that it finished + * @endinternal */ void ChunkWriteOperation::write_file_abt(void* _arg) { @@ -206,14 +214,11 @@ ChunkWriteOperation::ChunkWriteOperation(const string& path, size_t n) } /** + * @internal * Write buffer from a single chunk referenced by its ID. Put task into IO * queue. On failure the write operations is aborted, throwing an error, and * cleaned up. The caller may repeat a failed call. - * @param chunk_id - * @param bulk_buf_ptr - * @param size - * @param offset - * @throws ChunkWriteOpException + * @endinternal */ void ChunkWriteOperation::write_nonblock(size_t idx, const uint64_t chunk_id, @@ -252,11 +257,6 @@ ChunkWriteOperation::write_nonblock(size_t idx, const uint64_t chunk_id, } } -/** - * Waits for all Argobots tasklets to finish and report back the write error - * code and the size written. - * @return - */ pair ChunkWriteOperation::wait_for_tasks() { GKFS_DATA->spdlogger()->trace("ChunkWriteOperation::{}() enter: path '{}'", @@ -302,6 +302,7 @@ ChunkWriteOperation::wait_for_tasks() { * ------------------------------------------------------------------------*/ /** + * @internal * Used by an argobots tasklet. Argument args has the following fields: * const string* path; char* buf; @@ -313,7 +314,7 @@ ChunkWriteOperation::wait_for_tasks() { of concurrent IO operations per daemon. * This function is called by tasklets, as this function cannot be allowed to block. - * @return read_size is put into eventual to signal that it finished + * @endinternal */ void ChunkReadOperation::read_file_abt(void* _arg) { @@ -350,13 +351,11 @@ ChunkReadOperation::ChunkReadOperation(const string& path, size_t n) } /** + * @internal * Read buffer to a single chunk referenced by its ID. Put task into IO queue. * On failure the read operations is aborted, throwing an error, and cleaned up. * The caller may repeat a failed call. - * @param chunk_id - * @param bulk_buf_ptr - * @param size - * @param offset + * @endinternal */ void ChunkReadOperation::read_nonblock(size_t idx, const uint64_t chunk_id, diff --git a/src/daemon/util.cpp b/src/daemon/util.cpp index 5dfe97cf9..0fb7ca327 100644 --- a/src/daemon/util.cpp +++ b/src/daemon/util.cpp @@ -31,13 +31,21 @@ #include -#include -#include - using namespace std; namespace gkfs::utils { +/** + * @internal + * Appends a single line to an existing shared hosts file with the RPC + * connection information of this daemon. If it doesn't exist, it is created. + * The line includes the hostname and the RPC server's listening address. + * + * NOTE, the shared file system must support strong consistency semantics to + * ensure each daemon can write its information to the file even if the write + * access is simultaneous. + * @endinternal + */ void populate_hosts_file() { const auto& hosts_file = GKFS_DATA->hosts_file(); @@ -59,6 +67,13 @@ populate_hosts_file() { lfstream.close(); } +/** + * @internal + * This function removes the entire hosts file even if just one daemon is + * shutdown. This makes sense because the data distribution calculation would be + * misaligned if the entry of the current daemon was only removed. + * @endinternal + */ void destroy_hosts_file() { std::remove(GKFS_DATA->hosts_file().c_str()); -- GitLab