From 591abeab1f9c9da5e29f68e8ac87991ebfbfc83c Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Tue, 22 Feb 2022 16:53:07 +0100 Subject: [PATCH 1/6] Stats for GekkoFS --- include/common/statistics/stats.hpp | 175 ++++++++++++++++++++++++++++ include/daemon/classes/fs_data.hpp | 15 +++ src/common/statistics/stats.cpp | 164 ++++++++++++++++++++++++++ src/daemon/CMakeLists.txt | 1 + src/daemon/classes/fs_data.cpp | 9 ++ src/daemon/daemon.cpp | 3 + 6 files changed, 367 insertions(+) create mode 100644 include/common/statistics/stats.hpp create mode 100644 src/common/statistics/stats.cpp diff --git a/include/common/statistics/stats.hpp b/include/common/statistics/stats.hpp new file mode 100644 index 000000000..c6f869708 --- /dev/null +++ b/include/common/statistics/stats.hpp @@ -0,0 +1,175 @@ +/* + Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + This file is part of GekkoFS. + + GekkoFS is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + GekkoFS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GekkoFS. If not, see . + + SPDX-License-Identifier: GPL-3.0-or-later +*/ + +#ifndef GKFS_COMMON_STATS_HPP +#define GKFS_COMMON_STATS_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +/** + * Provides storage capabilities to provide stats about GekkoFS + * The information is per server. + * We do not provide accurate stats for 1-5-10 minute stats + * + */ +namespace gkfs::utils { + +/* + Number of operations (Create, write/ read, remove, mkdir...) + Size of database (metadata keys, should be not needed, any) + Size of data (+write - delete) + Server Bandwidth (write / read operations) + + mean, (lifetime of the server) + 1 minute mean + 5 minute mean + 10 minute mean + + To provide the stats that we need, + we need to store the info and the timestamp to calculate it + A vector should work, with a maximum of elements, + The stats will only be calculated when requested + a cached value will be send (with a deadline) + */ +class Stats{ + enum class IOPS_OP { + IOPS_CREATE, + IOPS_WRITE, + IOPS_READ, + IOPS_MKDIR, + IOPS_RMDIR, + IOPS_REMOVE, + }; + + constexpr static const std::initializer_list all_IOPS_OP = {IOPS_OP::IOPS_CREATE, IOPS_OP::IOPS_WRITE, IOPS_OP::IOPS_READ, IOPS_OP::IOPS_MKDIR,IOPS_OP::IOPS_RMDIR, IOPS_OP::IOPS_REMOVE}; + + enum class SIZE_OP { + METADATA_SIZE, + WRITE_SIZE, + READ_SIZE, + DATA_SIZE + }; + + constexpr static const std::initializer_list all_SIZE_OP = {SIZE_OP::METADATA_SIZE, SIZE_OP::DATA_SIZE, SIZE_OP::WRITE_SIZE, SIZE_OP::READ_SIZE}; + + std::chrono::time_point last_cached; + /* Measures when we started the server */ + std::chrono::time_point start; + // How many stats will be stored + const unsigned int MAX_STATS = 1000000; + + // Stores total value for global mean + std::map IOPS; + std::map SIZE; + + + // Stores timestamp when an operation comes + // removes if first operation if > 10 minutes + // Different means will be stored and cached 1 minuted + std::map > > TIME_IOPS; + // We will store 1, 5, and 10 minute mean; + std::map > CACHED_IOPS; + + // For size operations we need to store the timestamp and + // the size + std::map , unsigned long long > > + > TIME_SIZE; + // We will store 1, 5, and 10 minute mean; + std::map < enum SIZE_OP, std::vector > CACHED_SIZE; + +/** + * @brief Starts the Stats module and initializes structures + * + */ +public: + Stats(); + + +/** + * Add a new value for a IOPS, that does not involve any size + * No value needed as they are simple (1 create, 1 read...) + * Size operations internally call this operation (read,write) + * + * @param IOPS_OP Which operation to add + */ + +void add_value_iops (enum IOPS_OP); + +/** + * @brief Store a new stat point, with a size value. + * If it involves a IO operations it will call the corresponding + * operation + * + * @param SIZE_OP Which operation we refer + * @param value to store (SIZE_OP) + */ +void add_value_size (enum SIZE_OP, unsigned long long value); + +/** + * @brief Get the total mean value of the asked stat + * This can be provided inmediately without cost + * @return mean value + */ +double get_mean (enum IOPS_OP); + + +/** + * @brief Get the total mean value of the asked stat + * This can be provided inmediately without cost + * @return mean value + */ +double get_mean (enum SIZE_OP); + +/** + * @brief Get all the means (total, 1,5 and 10 minutes) for a SIZE_OP + * Returns precalculated values if we just calculated them 1 minute ago + * + * @return std::vector< double > with 4 means + */ +std::vector< double > get_four_means (enum SIZE_OP); + +/** + * @brief Get all the means (total, 1,5 and 10 minutes) for a IOPS_OP + * Returns precalculated values if we just calculated them 1 minute ago + * + * @return std::vector< double > with 4 means + */ +std::vector< double > get_four_means (enum IOPS_OP); +}; + +} // namespace gkfs::utils + +#endif // GKFS_COMMON_STATS_HPP \ No newline at end of file diff --git a/include/daemon/classes/fs_data.hpp b/include/daemon/classes/fs_data.hpp index 06e71468c..80b412c7a 100644 --- a/include/daemon/classes/fs_data.hpp +++ b/include/daemon/classes/fs_data.hpp @@ -46,6 +46,11 @@ namespace data { class ChunkStorage; } +/* Forward declarations */ +namespace utils { +class Stats; +} + namespace daemon { class FsData { @@ -85,6 +90,9 @@ private: bool link_cnt_state_; bool blocks_state_; + // Statistics + std::shared_ptr stats_; + public: static FsData* getInstance() { @@ -209,6 +217,13 @@ public: void parallax_size_md(unsigned int size_md); + + const std::shared_ptr& + stats() const; + + void + stats(const std::shared_ptr& stats); + }; } // namespace daemon diff --git a/src/common/statistics/stats.cpp b/src/common/statistics/stats.cpp new file mode 100644 index 000000000..03a6f87b0 --- /dev/null +++ b/src/common/statistics/stats.cpp @@ -0,0 +1,164 @@ +/* + Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + This file is part of GekkoFS. + + GekkoFS is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + GekkoFS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GekkoFS. If not, see . + + SPDX-License-Identifier: GPL-3.0-or-later +*/ + + +#include "/home/rnou/gekkofs/include/common/statistics/stats.hpp" + +using namespace std; + +namespace gkfs::utils{ + + Stats::Stats(){ + + // Init clocks + start = std::chrono::steady_clock::now(); + last_cached = std::chrono::steady_clock::now(); + // Init cached (4 mean values) + + for (auto e : all_IOPS_OP) + for (int i = 0; i < 4; i++) CACHED_IOPS[e].push_back(0.0); + + for (auto e : all_SIZE_OP) + for (int i = 0; i < 4; i++) CACHED_SIZE[e].push_back(0.0); + + + // To simplify the control we add an element into the different maps + // Statistaclly will be negligible... and we get a faster flow + + for (auto e : all_IOPS_OP) { + IOPS[e] = 0; + TIME_IOPS[e].push_back(std::chrono::steady_clock::now()); + } + + for (auto e : all_SIZE_OP) { + SIZE[e] = 0; + TIME_SIZE[e].push_back(pair(std::chrono::steady_clock::now(),0.0)); + } + } + + void Stats::add_value_iops (enum IOPS_OP iop){ + IOPS[iop]++; + auto now = std::chrono::steady_clock::now(); + + + if ( (now - TIME_IOPS[iop].front()) > std::chrono::duration(10s) ) { + TIME_IOPS[iop].pop_front(); + } + else if (TIME_IOPS[iop].size() >= MAX_STATS) TIME_IOPS[iop].pop_front(); + + TIME_IOPS[iop].push_back(std::chrono::steady_clock::now()); + } + + void Stats::add_value_size (enum SIZE_OP iop, unsigned long long value){ + auto now = std::chrono::steady_clock::now(); + SIZE[iop] += value; + if ( (now - TIME_SIZE[iop].front().first) > std::chrono::duration(10s) ) { + TIME_SIZE[iop].pop_front(); + } + else if (TIME_SIZE[iop].size() >= MAX_STATS) TIME_SIZE[iop].pop_front(); + + TIME_SIZE[iop].push_back(pair( std::chrono::steady_clock::now(), value ) ); + + if (iop == SIZE_OP::READ_SIZE) IOPS[IOPS_OP::IOPS_READ]++; + else if (iop == SIZE_OP::WRITE_SIZE) IOPS[IOPS_OP::IOPS_WRITE]++; + } + + /** + * @brief Get the total mean value of the asked stat + * This can be provided inmediately without cost + * @return mean value + */ + double Stats::get_mean (enum SIZE_OP sop){ + auto now = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(now - start); + double value = (double)SIZE[sop] / (double)duration.count(); + return value; + + } + + double Stats::get_mean (enum IOPS_OP iop){ + auto now = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(now - start); + double value = (double)IOPS[iop] / (double)duration.count(); + return value; + } + + +/** + * @brief Get all the means (total, 1,5 and 10 minutes) for a SIZE_OP + * Returns precalculated values if we just calculated them 1 minute ago + * // TODO: cache + * @return std::vector< double > with 4 means + */ + std::vector< double > Stats::get_four_means (enum SIZE_OP sop){ + std::vector < double > results = {0,0,0,0}; + auto now = std::chrono::steady_clock::now(); + for (auto e : TIME_SIZE[sop]) { + auto duration = std::chrono::duration_cast(now - e.first).count(); + if (duration > 10) break; + + results[3] += e.second; + if (duration > 5) continue; + results[2] += e.second; + if (duration > 1) continue; + results[1] += e.second; + } + + results[0] = get_mean(sop); + results[3] /= 10*60; + results[2] /= 5*60; + results[1] /= 60; + + return results; + } + + + std::vector< double > Stats::get_four_means (enum IOPS_OP iop){ + std::vector < double > results = {0,0,0,0}; + auto now = std::chrono::steady_clock::now(); + for (auto e : TIME_IOPS[iop]) { + auto duration = std::chrono::duration_cast(now - e).count(); + if (duration > 10) break; + + results[3] ++; + if (duration > 5) continue; + results[2] ++; + if (duration > 1) continue; + results[1] ++; + } + + results[0] = get_mean(iop); + results[3] /= 10*60; + results[2] /= 5*60; + results[1] /= 60; + + return results; + } + + +} // namespace gkfs::utils::stats diff --git a/src/daemon/CMakeLists.txt b/src/daemon/CMakeLists.txt index e3703d73b..f91415eee 100644 --- a/src/daemon/CMakeLists.txt +++ b/src/daemon/CMakeLists.txt @@ -63,6 +63,7 @@ set(DAEMON_LINK_LIBRARIES metadata_db storage distributor + statistics log_util env_util spdlog diff --git a/src/daemon/classes/fs_data.cpp b/src/daemon/classes/fs_data.cpp index 1611e9e8a..0eb2776af 100644 --- a/src/daemon/classes/fs_data.cpp +++ b/src/daemon/classes/fs_data.cpp @@ -219,6 +219,15 @@ void FsData::parallax_size_md(unsigned int size_md) { FsData::parallax_size_md_ = static_cast( size_md * 1024ull * 1024ull * 1024ull); + +const std::shared_ptr& +FsData::stats() const { + return stats_; +} + +void +FsData::stats(const std::shared_ptr& stats) { + stats_ = stats; } } // namespace gkfs::daemon diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 1ed24e9a2..49a2fd748 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -291,6 +291,9 @@ init_environment() { } #endif + // Initialize Stats + GKFS_DATA->stats(std::make_shared()); + // Initialize data backend auto chunk_storage_path = fmt::format("{}/{}", GKFS_DATA->rootdir(), gkfs::config::data::chunk_dir); -- GitLab From fad060167697b12ee108771e2e1ea6f04a361c3d Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Tue, 22 Feb 2022 17:07:38 +0100 Subject: [PATCH 2/6] updated CMake Sampling output Thread First stats test Added Stats argument to enable output --- CHANGELOG.md | 1 + README.md | 1 + include/common/statistics/stats.hpp | 210 ++++++++++++++--------- include/daemon/classes/fs_data.hpp | 10 ++ src/common/CMakeLists.txt | 9 + src/common/statistics/stats.cpp | 257 +++++++++++++++++----------- src/daemon/classes/fs_data.cpp | 17 ++ src/daemon/daemon.cpp | 14 +- src/daemon/handler/srv_data.cpp | 8 + src/daemon/handler/srv_metadata.cpp | 4 + 10 files changed, 347 insertions(+), 184 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05f997a69..79745b332 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Added Stats gathering in servers ### New - Added new experimental metadata backend: diff --git a/README.md b/README.md index 24857eba0..f76f5e272 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ Options: RocksDB is default if not set. Parallax support is experimental. Note, parallaxdb creates a file called rocksdbx with 8GB created in metadir. --parallaxsize TEXT parallaxdb - metadata file size in GB (default 8GB), used only with new files + --output-stats Enables the output of the stats on the stdout (each 10s) for debug --version Print version and exit. ``` diff --git a/include/common/statistics/stats.hpp b/include/common/statistics/stats.hpp index c6f869708..57ccf9b5e 100644 --- a/include/common/statistics/stats.hpp +++ b/include/common/statistics/stats.hpp @@ -37,11 +37,13 @@ #include #include #include +#include +#include /** * Provides storage capabilities to provide stats about GekkoFS - * The information is per server. + * The information is per server. * We do not provide accurate stats for 1-5-10 minute stats - * + * */ namespace gkfs::utils { @@ -56,13 +58,14 @@ namespace gkfs::utils { 5 minute mean 10 minute mean - To provide the stats that we need, + To provide the stats that we need, we need to store the info and the timestamp to calculate it - A vector should work, with a maximum of elements, + A vector should work, with a maximum of elements, The stats will only be calculated when requested a cached value will be send (with a deadline) */ -class Stats{ +class Stats { +public: enum class IOPS_OP { IOPS_CREATE, IOPS_WRITE, @@ -72,102 +75,139 @@ class Stats{ IOPS_REMOVE, }; - constexpr static const std::initializer_list all_IOPS_OP = {IOPS_OP::IOPS_CREATE, IOPS_OP::IOPS_WRITE, IOPS_OP::IOPS_READ, IOPS_OP::IOPS_MKDIR,IOPS_OP::IOPS_RMDIR, IOPS_OP::IOPS_REMOVE}; - - enum class SIZE_OP { - METADATA_SIZE, - WRITE_SIZE, - READ_SIZE, - DATA_SIZE - }; + enum class SIZE_OP { METADATA_SIZE, WRITE_SIZE, READ_SIZE, DATA_SIZE }; - constexpr static const std::initializer_list all_SIZE_OP = {SIZE_OP::METADATA_SIZE, SIZE_OP::DATA_SIZE, SIZE_OP::WRITE_SIZE, SIZE_OP::READ_SIZE}; +private: + constexpr static const std::initializer_list all_IOPS_OP = { + IOPS_OP::IOPS_CREATE, IOPS_OP::IOPS_WRITE, IOPS_OP::IOPS_READ, + IOPS_OP::IOPS_MKDIR, IOPS_OP::IOPS_RMDIR, IOPS_OP::IOPS_REMOVE}; + constexpr static const std::initializer_list all_SIZE_OP = { + SIZE_OP::METADATA_SIZE, SIZE_OP::DATA_SIZE, SIZE_OP::WRITE_SIZE, + SIZE_OP::READ_SIZE}; + + const std::vector IOPS_OP_S = {"IOPS_CREATE", "IOPS_WRITE", + "IOPS_READ", "IOPS_MKDIR", + "IOPS_RMDIR", "IOPS_REMOVE"}; + const std::vector SIZE_OP_S = {"METADATA_SIZE", "WRITE_SIZE", + "READ_SIZE", "DATA_SIZE"}; std::chrono::time_point last_cached; /* Measures when we started the server */ std::chrono::time_point start; - // How many stats will be stored - const unsigned int MAX_STATS = 1000000; + // How many stats will be stored + const unsigned int MAX_STATS = 1000000; // Stores total value for global mean - std::map IOPS; - std::map SIZE; + std::map IOPS; + std::map SIZE; // Stores timestamp when an operation comes - // removes if first operation if > 10 minutes + // removes if first operation if > 10 minutes // Different means will be stored and cached 1 minuted - std::map > > TIME_IOPS; + std::map>> + TIME_IOPS; // We will store 1, 5, and 10 minute mean; - std::map > CACHED_IOPS; + std::map> CACHED_IOPS; // For size operations we need to store the timestamp and // the size - std::map , unsigned long long > > - > TIME_SIZE; + std::map, + unsigned long long>>> + TIME_SIZE; // We will store 1, 5, and 10 minute mean; - std::map < enum SIZE_OP, std::vector > CACHED_SIZE; - -/** - * @brief Starts the Stats module and initializes structures - * - */ -public: - Stats(); - - -/** - * Add a new value for a IOPS, that does not involve any size - * No value needed as they are simple (1 create, 1 read...) - * Size operations internally call this operation (read,write) - * - * @param IOPS_OP Which operation to add - */ - -void add_value_iops (enum IOPS_OP); - -/** - * @brief Store a new stat point, with a size value. - * If it involves a IO operations it will call the corresponding - * operation - * - * @param SIZE_OP Which operation we refer - * @param value to store (SIZE_OP) - */ -void add_value_size (enum SIZE_OP, unsigned long long value); - -/** - * @brief Get the total mean value of the asked stat - * This can be provided inmediately without cost - * @return mean value - */ -double get_mean (enum IOPS_OP); - - -/** - * @brief Get the total mean value of the asked stat - * This can be provided inmediately without cost - * @return mean value - */ -double get_mean (enum SIZE_OP); - -/** - * @brief Get all the means (total, 1,5 and 10 minutes) for a SIZE_OP - * Returns precalculated values if we just calculated them 1 minute ago - * - * @return std::vector< double > with 4 means - */ -std::vector< double > get_four_means (enum SIZE_OP); - -/** - * @brief Get all the means (total, 1,5 and 10 minutes) for a IOPS_OP - * Returns precalculated values if we just calculated them 1 minute ago - * - * @return std::vector< double > with 4 means - */ -std::vector< double > get_four_means (enum IOPS_OP); + std::map> CACHED_SIZE; + + // Thread that outputs stats info + std::thread t_output; + bool output_thread_; + + // Controls the destruction of the class/stops the thread + bool running = true; + /** + * @brief Sends all the stats to the screen + * Debug Function + * + * @param d is the time between output + */ + void + output(std::chrono::seconds d); + +public: + /** + * @brief Starts the Stats module and initializes structures + * + */ + Stats(bool output_thread); + + /** + * @brief Destroys the class, and any associated thread + * + */ + ~Stats(); + + /** + * Add a new value for a IOPS, that does not involve any size + * No value needed as they are simple (1 create, 1 read...) + * Size operations internally call this operation (read,write) + * + * @param IOPS_OP Which operation to add + */ + + void add_value_iops(enum IOPS_OP); + + /** + * @brief Store a new stat point, with a size value. + * If it involves a IO operations it will call the corresponding + * operation + * + * @param SIZE_OP Which operation we refer + * @param value to store (SIZE_OP) + */ + void + add_value_size(enum SIZE_OP, unsigned long long value); + + /** + * @brief Get the total mean value of the asked stat + * This can be provided inmediately without cost + * @return mean value + */ + double get_mean(enum IOPS_OP); + + + /** + * @brief Get the total mean value of the asked stat + * This can be provided inmediately without cost + * @return mean value + */ + double get_mean(enum SIZE_OP); + + /** + * @brief Get all the means (total, 1,5 and 10 minutes) for a SIZE_OP + * Returns precalculated values if we just calculated them 1 minute ago + * + * @return std::vector< double > with 4 means + */ + std::vector get_four_means(enum SIZE_OP); + + /** + * @brief Get all the means (total, 1,5 and 10 minutes) for a IOPS_OP + * Returns precalculated values if we just calculated them 1 minute ago + * + * @return std::vector< double > with 4 means + */ + std::vector get_four_means(enum IOPS_OP); + + + /** + * @brief Dumps all the means from the stats + * + */ + void + dump(); }; } // namespace gkfs::utils diff --git a/include/daemon/classes/fs_data.hpp b/include/daemon/classes/fs_data.hpp index 80b412c7a..240a9d192 100644 --- a/include/daemon/classes/fs_data.hpp +++ b/include/daemon/classes/fs_data.hpp @@ -92,6 +92,7 @@ private: // Statistics std::shared_ptr stats_; + bool output_stats_ = false; public: static FsData* @@ -224,8 +225,17 @@ public: void stats(const std::shared_ptr& stats); + void + close_stats(); + + bool + output_stats() const; + + void + output_stats(bool output_stats); }; + } // namespace daemon } // namespace gkfs diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 74b80e58c..f0149fcc9 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -39,6 +39,15 @@ target_sources(distributor ${CMAKE_CURRENT_LIST_DIR}/rpc/distributor.cpp ) +add_library(statistics STATIC) +set_property(TARGET statistics PROPERTY POSITION_INDEPENDENT_CODE ON) +target_sources(statistics + PUBLIC + ${INCLUDE_DIR}/common/statistics/stats.hpp + PRIVATE + ${CMAKE_CURRENT_LIST_DIR}/statistics/stats.cpp + ) + if(GKFS_ENABLE_CODE_COVERAGE) target_code_coverage(distributor AUTO) endif() diff --git a/src/common/statistics/stats.cpp b/src/common/statistics/stats.cpp index 03a6f87b0..7e9e16a01 100644 --- a/src/common/statistics/stats.cpp +++ b/src/common/statistics/stats.cpp @@ -27,86 +27,107 @@ */ -#include "/home/rnou/gekkofs/include/common/statistics/stats.hpp" +#include using namespace std; -namespace gkfs::utils{ +namespace gkfs::utils { - Stats::Stats(){ +Stats::Stats(bool output_thread) { - // Init clocks - start = std::chrono::steady_clock::now(); - last_cached = std::chrono::steady_clock::now(); - // Init cached (4 mean values) - - for (auto e : all_IOPS_OP) - for (int i = 0; i < 4; i++) CACHED_IOPS[e].push_back(0.0); + // Init clocks + start = std::chrono::steady_clock::now(); + last_cached = std::chrono::steady_clock::now(); + // Init cached (4 mean values) - for (auto e : all_SIZE_OP) - for (int i = 0; i < 4; i++) CACHED_SIZE[e].push_back(0.0); + for(auto e : all_IOPS_OP) + for(int i = 0; i < 4; i++) CACHED_IOPS[e].push_back(0.0); + for(auto e : all_SIZE_OP) + for(int i = 0; i < 4; i++) CACHED_SIZE[e].push_back(0.0); - // To simplify the control we add an element into the different maps - // Statistaclly will be negligible... and we get a faster flow - for (auto e : all_IOPS_OP) { - IOPS[e] = 0; - TIME_IOPS[e].push_back(std::chrono::steady_clock::now()); - } + // To simplify the control we add an element into the different maps + // Statistaclly will be negligible... and we get a faster flow - for (auto e : all_SIZE_OP) { - SIZE[e] = 0; - TIME_SIZE[e].push_back(pair(std::chrono::steady_clock::now(),0.0)); - } + for(auto e : all_IOPS_OP) { + IOPS[e] = 0; + TIME_IOPS[e].push_back(std::chrono::steady_clock::now()); } - void Stats::add_value_iops (enum IOPS_OP iop){ - IOPS[iop]++; - auto now = std::chrono::steady_clock::now(); + for(auto e : all_SIZE_OP) { + SIZE[e] = 0; + TIME_SIZE[e].push_back(pair(std::chrono::steady_clock::now(), 0.0)); + } - - if ( (now - TIME_IOPS[iop].front()) > std::chrono::duration(10s) ) { - TIME_IOPS[iop].pop_front(); - } - else if (TIME_IOPS[iop].size() >= MAX_STATS) TIME_IOPS[iop].pop_front(); + output_thread_ = output_thread; - TIME_IOPS[iop].push_back(std::chrono::steady_clock::now()); + if (output_thread_) { + t_output = std::thread([this] { output(std::chrono::duration(10s)); }); } +} - void Stats::add_value_size (enum SIZE_OP iop, unsigned long long value){ - auto now = std::chrono::steady_clock::now(); - SIZE[iop] += value; - if ( (now - TIME_SIZE[iop].front().first) > std::chrono::duration(10s) ) { - TIME_SIZE[iop].pop_front(); - } - else if (TIME_SIZE[iop].size() >= MAX_STATS) TIME_SIZE[iop].pop_front(); - - TIME_SIZE[iop].push_back(pair( std::chrono::steady_clock::now(), value ) ); - - if (iop == SIZE_OP::READ_SIZE) IOPS[IOPS_OP::IOPS_READ]++; - else if (iop == SIZE_OP::WRITE_SIZE) IOPS[IOPS_OP::IOPS_WRITE]++; +Stats::~Stats() { + // We do not need a mutex for that + if (output_thread_) { + running = false; + t_output.join(); } +} - /** - * @brief Get the total mean value of the asked stat - * This can be provided inmediately without cost - * @return mean value - */ - double Stats::get_mean (enum SIZE_OP sop){ - auto now = std::chrono::steady_clock::now(); - auto duration = std::chrono::duration_cast(now - start); - double value = (double)SIZE[sop] / (double)duration.count(); - return value; +void +Stats::add_value_iops(enum IOPS_OP iop) { + IOPS[iop]++; + auto now = std::chrono::steady_clock::now(); - } - double Stats::get_mean (enum IOPS_OP iop){ - auto now = std::chrono::steady_clock::now(); - auto duration = std::chrono::duration_cast(now - start); - double value = (double)IOPS[iop] / (double)duration.count(); - return value; - } + if((now - TIME_IOPS[iop].front()) > std::chrono::duration(10s)) { + TIME_IOPS[iop].pop_front(); + } else if(TIME_IOPS[iop].size() >= MAX_STATS) + TIME_IOPS[iop].pop_front(); + + TIME_IOPS[iop].push_back(std::chrono::steady_clock::now()); +} + +void +Stats::add_value_size(enum SIZE_OP iop, unsigned long long value) { + auto now = std::chrono::steady_clock::now(); + SIZE[iop] += value; + if((now - TIME_SIZE[iop].front().first) > std::chrono::duration(10s)) { + TIME_SIZE[iop].pop_front(); + } else if(TIME_SIZE[iop].size() >= MAX_STATS) + TIME_SIZE[iop].pop_front(); + + TIME_SIZE[iop].push_back(pair(std::chrono::steady_clock::now(), value)); + + if(iop == SIZE_OP::READ_SIZE) + IOPS[IOPS_OP::IOPS_READ]++; + else if(iop == SIZE_OP::WRITE_SIZE) + IOPS[IOPS_OP::IOPS_WRITE]++; +} + +/** + * @brief Get the total mean value of the asked stat + * This can be provided inmediately without cost + * @return mean value + */ +double +Stats::get_mean(enum SIZE_OP sop) { + auto now = std::chrono::steady_clock::now(); + auto duration = + std::chrono::duration_cast(now - start); + double value = (double) SIZE[sop] / (double) duration.count(); + return value; +} + +double +Stats::get_mean(enum IOPS_OP iop) { + auto now = std::chrono::steady_clock::now(); + auto duration = + std::chrono::duration_cast(now - start); + double value = (double) IOPS[iop] / (double) duration.count(); + return value; +} /** @@ -115,50 +136,92 @@ namespace gkfs::utils{ * // TODO: cache * @return std::vector< double > with 4 means */ - std::vector< double > Stats::get_four_means (enum SIZE_OP sop){ - std::vector < double > results = {0,0,0,0}; - auto now = std::chrono::steady_clock::now(); - for (auto e : TIME_SIZE[sop]) { - auto duration = std::chrono::duration_cast(now - e.first).count(); - if (duration > 10) break; - - results[3] += e.second; - if (duration > 5) continue; - results[2] += e.second; - if (duration > 1) continue; - results[1] += e.second; - } - - results[0] = get_mean(sop); - results[3] /= 10*60; - results[2] /= 5*60; - results[1] /= 60; +std::vector +Stats::get_four_means(enum SIZE_OP sop) { + std::vector results = {0, 0, 0, 0}; + auto now = std::chrono::steady_clock::now(); + for(auto e : TIME_SIZE[sop]) { + auto duration = + std::chrono::duration_cast(now - e.first) + .count(); + if(duration > 10) + break; + + results[3] += e.second; + if(duration > 5) + continue; + results[2] += e.second; + if(duration > 1) + continue; + results[1] += e.second; + } - return results; + results[0] = get_mean(sop); + results[3] /= 10 * 60; + results[2] /= 5 * 60; + results[1] /= 60; + + return results; +} + + +std::vector +Stats::get_four_means(enum IOPS_OP iop) { + std::vector results = {0, 0, 0, 0}; + auto now = std::chrono::steady_clock::now(); + for(auto e : TIME_IOPS[iop]) { + auto duration = + std::chrono::duration_cast(now - e) + .count(); + if(duration > 10) + break; + + results[3]++; + if(duration > 5) + continue; + results[2]++; + if(duration > 1) + continue; + results[1]++; } + results[0] = get_mean(iop); + results[3] /= 10 * 60; + results[2] /= 5 * 60; + results[1] /= 60; - std::vector< double > Stats::get_four_means (enum IOPS_OP iop){ - std::vector < double > results = {0,0,0,0}; - auto now = std::chrono::steady_clock::now(); - for (auto e : TIME_IOPS[iop]) { - auto duration = std::chrono::duration_cast(now - e).count(); - if (duration > 10) break; + return results; +} - results[3] ++; - if (duration > 5) continue; - results[2] ++; - if (duration > 1) continue; - results[1] ++; - } +void +Stats::dump() { + for(auto e : all_IOPS_OP) { + auto tmp = get_four_means(e); - results[0] = get_mean(iop); - results[3] /= 10*60; - results[2] /= 5*60; - results[1] /= 60; + std::cout << "Stats " << IOPS_OP_S[static_cast(e)] << " "; + for(auto mean : tmp) { + std::cout << mean << " - "; + } + std::cout << std::endl; + } + for(auto e : all_SIZE_OP) { + auto tmp = get_four_means(e); - return results; + std::cout << "Stats " << SIZE_OP_S[static_cast(e)] << " "; + for(auto mean : tmp) { + std::cout << mean << " - "; + } + std::cout << std::endl; } +} +void +Stats::output(std::chrono::seconds d) { + while(running) { + dump(); + + std::this_thread::sleep_for(d); + } +} -} // namespace gkfs::utils::stats +} // namespace gkfs::utils diff --git a/src/daemon/classes/fs_data.cpp b/src/daemon/classes/fs_data.cpp index 0eb2776af..e5fa97e88 100644 --- a/src/daemon/classes/fs_data.cpp +++ b/src/daemon/classes/fs_data.cpp @@ -230,4 +230,21 @@ FsData::stats(const std::shared_ptr& stats) { stats_ = stats; } +void +FsData::close_stats() { + stats_.reset(); +} + + +bool +FsData::output_stats() const { + return output_stats_; +} + +void +FsData::output_stats(bool output_stats) { + FsData::output_stats_ = output_stats; +} + + } // namespace gkfs::daemon diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 49a2fd748..45b6e410c 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -52,6 +52,7 @@ #include #endif +#include #include #include @@ -292,7 +293,7 @@ init_environment() { #endif // Initialize Stats - GKFS_DATA->stats(std::make_shared()); + GKFS_DATA->stats(std::make_shared(GKFS_DATA->output_stats())); // Initialize data backend auto chunk_storage_path = fmt::format("{}/{}", GKFS_DATA->rootdir(), @@ -423,6 +424,7 @@ destroy_enviroment() { fs::remove_all(GKFS_DATA->metadir(), ecode); fs::remove_all(GKFS_DATA->rootdir(), ecode); } + GKFS_DATA->close_stats(); } /** @@ -582,9 +584,14 @@ parse_input(const cli_options& opts, const CLI::App& desc) { fs::create_directories(rootdir_path); GKFS_DATA->rootdir(rootdir_path.native()); + if (desc.count("--output-stats")) { + GKFS_DATA->output_stats(true); + } + + if(desc.count("--metadir")) { auto metadir = opts.metadir; - + #ifdef GKFS_ENABLE_FORWARDING auto metadir_path = fs::path(metadir) / fmt::format_int(getpid()).str(); #else @@ -714,6 +721,9 @@ main(int argc, const char* argv[]) { desc.add_option("--parallaxsize", opts.parallax_size, "parallaxdb - metadata file size in GB (default 8GB), " "used only with new files"); + desc.add_flag("--output-stats", + "Creates a thread that outputs the server stats each 10s"); + desc.add_flag("--version", "Print version and exit."); // clang-format on try { diff --git a/src/daemon/handler/srv_data.cpp b/src/daemon/handler/srv_data.cpp index d7b9a6ace..ee0e2e39c 100644 --- a/src/daemon/handler/srv_data.cpp +++ b/src/daemon/handler/srv_data.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #ifdef GKFS_ENABLE_AGIOS #include @@ -113,6 +114,9 @@ rpc_srv_write(hg_handle_t handle) { "{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'", __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n, in.total_chunk_size, bulk_size, in.offset); + GKFS_DATA->stats()->add_value_size(gkfs::utils::Stats::SIZE_OP::WRITE_SIZE, + bulk_size); + #ifdef GKFS_ENABLE_AGIOS int* data; ABT_eventual eventual = ABT_EVENTUAL_NULL; @@ -404,6 +408,10 @@ rpc_srv_read(hg_handle_t handle) { "{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'", __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n, in.total_chunk_size, bulk_size, in.offset); + + GKFS_DATA->stats()->add_value_size(gkfs::utils::Stats::SIZE_OP::READ_SIZE, + bulk_size); + #ifdef GKFS_ENABLE_AGIOS int* data; ABT_eventual eventual = ABT_EVENTUAL_NULL; diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index 3cea7e3ea..d31d34003 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -41,6 +41,7 @@ #include #include +#include using namespace std; @@ -89,6 +90,9 @@ rpc_srv_create(hg_handle_t handle) { auto hret = margo_respond(handle, &out); if(hret != HG_SUCCESS) { GKFS_DATA->spdlogger()->error("{}() Failed to respond", __func__); + } else { + GKFS_DATA->stats()->add_value_iops( + gkfs::utils::Stats::IOPS_OP::IOPS_CREATE); } // Destroy handle when finished -- GitLab From d17a2cb32f2c2418591b5b71bc5973add0122a32 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 2 Mar 2022 11:50:27 +0100 Subject: [PATCH 3/6] Added DIRENTS and STATS stats, remover mkdir, rmdir, metadata and data Added Ifdef feature, chunk distribution statistics Added output to file --- CMakeLists.txt | 6 ++ include/common/statistics/stats.hpp | 86 ++++++++++++++------- include/daemon/classes/fs_data.hpp | 7 ++ src/common/statistics/stats.cpp | 112 +++++++++++++++++++++------- src/daemon/classes/fs_data.cpp | 11 ++- src/daemon/daemon.cpp | 20 +++-- src/daemon/handler/srv_data.cpp | 5 ++ src/daemon/handler/srv_metadata.cpp | 12 ++- 8 files changed, 191 insertions(+), 68 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7589df519..84410f80e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,6 +195,12 @@ if(GKFS_USE_GUIDED_DISTRIBUTION) message(STATUS "[gekkofs] Guided data distributor input file path: ${GKFS_USE_GUIDED_DISTRIBUTION_PATH}") endif() +option(GKFS_CHUNK_STATS "Gather Chunk Stats " OFF) +if (GKFS_CHUNK_STATS) + add_definitions(-DGKFS_CHUNK_STATS) +endif () +message(STATUS "[gekkofs] Gather Chunk Stats: ${GKFS_CHUNK_STATS}") + configure_file(include/common/cmake_configure.hpp.in include/common/cmake_configure.hpp) diff --git a/include/common/statistics/stats.hpp b/include/common/statistics/stats.hpp index 57ccf9b5e..0334ae4cf 100644 --- a/include/common/statistics/stats.hpp +++ b/include/common/statistics/stats.hpp @@ -33,12 +33,15 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include /** * Provides storage capabilities to provide stats about GekkoFS * The information is per server. @@ -70,27 +73,25 @@ public: IOPS_CREATE, IOPS_WRITE, IOPS_READ, - IOPS_MKDIR, - IOPS_RMDIR, + IOPS_STATS, + IOPS_DIRENTS, IOPS_REMOVE, }; - enum class SIZE_OP { METADATA_SIZE, WRITE_SIZE, READ_SIZE, DATA_SIZE }; + enum class SIZE_OP { WRITE_SIZE, READ_SIZE }; private: constexpr static const std::initializer_list all_IOPS_OP = { - IOPS_OP::IOPS_CREATE, IOPS_OP::IOPS_WRITE, IOPS_OP::IOPS_READ, - IOPS_OP::IOPS_MKDIR, IOPS_OP::IOPS_RMDIR, IOPS_OP::IOPS_REMOVE}; + IOPS_OP::IOPS_CREATE, IOPS_OP::IOPS_WRITE, IOPS_OP::IOPS_READ, + IOPS_OP::IOPS_STATS, IOPS_OP::IOPS_DIRENTS, IOPS_OP::IOPS_REMOVE}; constexpr static const std::initializer_list all_SIZE_OP = { - SIZE_OP::METADATA_SIZE, SIZE_OP::DATA_SIZE, SIZE_OP::WRITE_SIZE, - SIZE_OP::READ_SIZE}; - - const std::vector IOPS_OP_S = {"IOPS_CREATE", "IOPS_WRITE", - "IOPS_READ", "IOPS_MKDIR", - "IOPS_RMDIR", "IOPS_REMOVE"}; - const std::vector SIZE_OP_S = {"METADATA_SIZE", "WRITE_SIZE", - "READ_SIZE", "DATA_SIZE"}; + SIZE_OP::WRITE_SIZE, SIZE_OP::READ_SIZE}; + + const std::vector IOPS_OP_S = {"IOPS_CREATE", "IOPS_WRITE", + "IOPS_READ", "IOPS_STATS", + "IOPS_DIRENTS", "IOPS_REMOVE"}; + const std::vector SIZE_OP_S = {"WRITE_SIZE", "READ_SIZE"}; std::chrono::time_point last_cached; /* Measures when we started the server */ std::chrono::time_point start; @@ -108,8 +109,6 @@ private: std::map>> TIME_IOPS; - // We will store 1, 5, and 10 minute mean; - std::map> CACHED_IOPS; // For size operations we need to store the timestamp and // the size @@ -118,8 +117,6 @@ private: std::chrono::time_point, unsigned long long>>> TIME_SIZE; - // We will store 1, 5, and 10 minute mean; - std::map> CACHED_SIZE; // Thread that outputs stats info std::thread t_output; @@ -132,16 +129,39 @@ private: * Debug Function * * @param d is the time between output + * @param file_output is the output file + */ + void + output(std::chrono::seconds d, std::string file_output); + + std::map, unsigned int> + CHUNK_READ; + std::map, unsigned int> + CHUNK_WRITE; + + /** + * @brief Called by output to generate CHUNK map + * + * @param output is the output stream + */ + void + output_map(std::ofstream& output); + + + /** + * @brief Dumps all the means from the stats + * @param of Output stream */ void - output(std::chrono::seconds d); + dump(std::ofstream& of); public: /** * @brief Starts the Stats module and initializes structures - * + * @param output_thread creates an aditional thread that outputs the stats + * @param filename file where to write the output */ - Stats(bool output_thread); + Stats(bool output_thread, std::string filename); /** * @brief Destroys the class, and any associated thread @@ -149,6 +169,24 @@ public: */ ~Stats(); + /** + * @brief Adds a new read access to the chunk/path specified + * + * @param path + * @param chunk + */ + void + add_read(std::string path, unsigned long long chunk); + /** + * @brief Adds a new write access to the chunk/path specified + * + * @param path + * @param chunk + */ + void + add_write(std::string path, unsigned long long chunk); + + /** * Add a new value for a IOPS, that does not involve any size * No value needed as they are simple (1 create, 1 read...) @@ -200,14 +238,6 @@ public: * @return std::vector< double > with 4 means */ std::vector get_four_means(enum IOPS_OP); - - - /** - * @brief Dumps all the means from the stats - * - */ - void - dump(); }; } // namespace gkfs::utils diff --git a/include/daemon/classes/fs_data.hpp b/include/daemon/classes/fs_data.hpp index 240a9d192..8e1b1a38a 100644 --- a/include/daemon/classes/fs_data.hpp +++ b/include/daemon/classes/fs_data.hpp @@ -93,6 +93,7 @@ private: // Statistics std::shared_ptr stats_; bool output_stats_ = false; + std::string stats_file_; public: static FsData* @@ -233,6 +234,12 @@ public: void output_stats(bool output_stats); + + std::string + stats_file() const; + + void + stats_file(std::string stats_file); }; diff --git a/src/common/statistics/stats.cpp b/src/common/statistics/stats.cpp index 7e9e16a01..d3b522e76 100644 --- a/src/common/statistics/stats.cpp +++ b/src/common/statistics/stats.cpp @@ -33,19 +33,10 @@ using namespace std; namespace gkfs::utils { -Stats::Stats(bool output_thread) { +Stats::Stats(bool output_thread, std::string stats_file) { // Init clocks start = std::chrono::steady_clock::now(); - last_cached = std::chrono::steady_clock::now(); - // Init cached (4 mean values) - - for(auto e : all_IOPS_OP) - for(int i = 0; i < 4; i++) CACHED_IOPS[e].push_back(0.0); - - for(auto e : all_SIZE_OP) - for(int i = 0; i < 4; i++) CACHED_SIZE[e].push_back(0.0); - // To simplify the control we add an element into the different maps // Statistaclly will be negligible... and we get a faster flow @@ -62,19 +53,67 @@ Stats::Stats(bool output_thread) { output_thread_ = output_thread; - if (output_thread_) { - t_output = std::thread([this] { output(std::chrono::duration(10s)); }); + if(output_thread_) { + t_output = std::thread([this, stats_file] { + output(std::chrono::duration(10s), stats_file); + }); } } Stats::~Stats() { // We do not need a mutex for that - if (output_thread_) { + if(output_thread_) { running = false; t_output.join(); } } +void +Stats::add_read(std::string path, unsigned long long chunk) { + CHUNK_READ[pair(path, chunk)]++; +} + +void +Stats::add_write(std::string path, unsigned long long chunk) { + CHUNK_WRITE[pair(path, chunk)]++; +} + + +void +Stats::output_map(std::ofstream& output) { + // Ordering + map>> + ORDER_WRITE; + + map>> + ORDER_READ; + + for(auto i : CHUNK_READ) { + ORDER_READ[i.second].insert(i.first); + } + + for(auto i : CHUNK_WRITE) { + ORDER_WRITE[i.second].insert(i.first); + } + + auto CHUNK_MAP = + [](std::string caption, + map>>& ORDER, + std::ofstream& output) { + output << caption << std::endl; + for(auto k : ORDER) { + output << k.first << " -- "; + for(auto v : k.second) { + output << v.first << " // " << v.second << endl; + } + } + }; + + CHUNK_MAP("READ CHUNK MAP", ORDER_READ, output); + CHUNK_MAP("WRITE CHUNK MAP", ORDER_WRITE, output); +} + void Stats::add_value_iops(enum IOPS_OP iop) { IOPS[iop]++; @@ -155,11 +194,11 @@ Stats::get_four_means(enum SIZE_OP sop) { continue; results[1] += e.second; } - - results[0] = get_mean(sop); - results[3] /= 10 * 60; - results[2] /= 5 * 60; - results[1] /= 60; + // Mean in MB/s + results[0] = get_mean(sop) / (1024.0 * 1024.0); + results[3] /= 10 * 60 * (1024.0 * 1024.0); + results[2] /= 5 * 60 * (1024.0 * 1024.0); + results[1] /= 60 * (1024.0 * 1024.0); return results; } @@ -194,33 +233,48 @@ Stats::get_four_means(enum IOPS_OP iop) { } void -Stats::dump() { +Stats::dump(std::ofstream& of) { for(auto e : all_IOPS_OP) { auto tmp = get_four_means(e); - std::cout << "Stats " << IOPS_OP_S[static_cast(e)] << " "; + of << "Stats " << IOPS_OP_S[static_cast(e)] + << " IOPS/s (avg, 1 min, 5 min, 10 min) \t\t"; for(auto mean : tmp) { - std::cout << mean << " - "; + of << std::setprecision(4) << std::setw(9) << mean << " - "; } - std::cout << std::endl; + of << std::endl; } for(auto e : all_SIZE_OP) { auto tmp = get_four_means(e); - std::cout << "Stats " << SIZE_OP_S[static_cast(e)] << " "; + of << "Stats " << SIZE_OP_S[static_cast(e)] + << " MB/s (avg, 1 min, 5 min, 10 min) \t\t"; for(auto mean : tmp) { - std::cout << mean << " - "; + of << std::setprecision(4) << std::setw(9) << mean << " - "; } - std::cout << std::endl; + of << std::endl; } + of << std::endl; } void -Stats::output(std::chrono::seconds d) { +Stats::output(std::chrono::seconds d, std::string file_output) { + int times = 0; + std::ofstream of(file_output, std::ios_base::openmode::_S_trunc); while(running) { - dump(); - - std::this_thread::sleep_for(d); + dump(of); + std::chrono::seconds a = 0s; + + times++; +#ifdef GKFS_CHUNK_STATS + if(times % 4 == 0) + output_map(of); +#endif + + while(running and a < d) { + a += 1s; + std::this_thread::sleep_for(1s); + } } } diff --git a/src/daemon/classes/fs_data.cpp b/src/daemon/classes/fs_data.cpp index e5fa97e88..46b7e5472 100644 --- a/src/daemon/classes/fs_data.cpp +++ b/src/daemon/classes/fs_data.cpp @@ -227,7 +227,7 @@ FsData::stats() const { void FsData::stats(const std::shared_ptr& stats) { - stats_ = stats; + FsData::stats_ = stats; } void @@ -246,5 +246,14 @@ FsData::output_stats(bool output_stats) { FsData::output_stats_ = output_stats; } +std::string +FsData::stats_file() const { + return stats_file_; +} + +void +FsData::stats_file(std::string stats_file) { + FsData::stats_file_ = stats_file; +} } // namespace gkfs::daemon diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 45b6e410c..cb6715bcb 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -82,6 +82,7 @@ struct cli_options { string rpc_protocol; string dbbackend; string parallax_size; + string stats_file; }; /** @@ -293,7 +294,8 @@ init_environment() { #endif // Initialize Stats - GKFS_DATA->stats(std::make_shared(GKFS_DATA->output_stats())); + GKFS_DATA->stats(std::make_shared( + GKFS_DATA->output_stats(), GKFS_DATA->stats_file())); // Initialize data backend auto chunk_storage_path = fmt::format("{}/{}", GKFS_DATA->rootdir(), @@ -584,14 +586,14 @@ parse_input(const cli_options& opts, const CLI::App& desc) { fs::create_directories(rootdir_path); GKFS_DATA->rootdir(rootdir_path.native()); - if (desc.count("--output-stats")) { - GKFS_DATA->output_stats(true); + if(desc.count("--output-stats")) { + GKFS_DATA->output_stats(true); } - + if(desc.count("--metadir")) { auto metadir = opts.metadir; - + #ifdef GKFS_ENABLE_FORWARDING auto metadir_path = fs::path(metadir) / fmt::format_int(getpid()).str(); #else @@ -653,6 +655,9 @@ parse_input(const cli_options& opts, const CLI::App& desc) { if(desc.count("--parallaxsize")) { // Size in GB GKFS_DATA->parallax_size_md(stoi(opts.parallax_size)); + if(desc.count("--output-stats")) { + auto stats_file = opts.stats_file; + GKFS_DATA->stats_file(stats_file); } } @@ -721,8 +726,9 @@ main(int argc, const char* argv[]) { desc.add_option("--parallaxsize", opts.parallax_size, "parallaxdb - metadata file size in GB (default 8GB), " "used only with new files"); - desc.add_flag("--output-stats", - "Creates a thread that outputs the server stats each 10s"); + desc.add_option( + "--output-stats", opts.stats_file, + "Creates a thread that outputs the server stats each 10s, to the file specified"); desc.add_flag("--version", "Print version and exit."); // clang-format on diff --git a/src/daemon/handler/srv_data.cpp b/src/daemon/handler/srv_data.cpp index ee0e2e39c..59b62859a 100644 --- a/src/daemon/handler/srv_data.cpp +++ b/src/daemon/handler/srv_data.cpp @@ -114,6 +114,7 @@ rpc_srv_write(hg_handle_t handle) { "{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'", __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n, in.total_chunk_size, bulk_size, in.offset); + GKFS_DATA->stats()->add_value_size(gkfs::utils::Stats::SIZE_OP::WRITE_SIZE, bulk_size); @@ -237,6 +238,9 @@ rpc_srv_write(hg_handle_t handle) { __func__, chnk_id_file, host_id, chnk_id_curr); continue; } +#ifdef GKFS_CHUNK_STATS + GKFS_DATA->stats()->add_write(in.path, chnk_id_file); +#endif #endif chnk_ids_host[chnk_id_curr] = @@ -521,6 +525,7 @@ rpc_srv_read(hg_handle_t handle) { __func__, chnk_id_file, host_id, chnk_id_curr); continue; } + GKFS_DATA->stats()->add_read(in.path, chnk_id_file); #endif chnk_ids_host[chnk_id_curr] = diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index d31d34003..38f9035aa 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -78,6 +78,8 @@ rpc_srv_create(hg_handle_t handle) { // create metadentry gkfs::metadata::create(in.path, md); out.err = 0; + GKFS_DATA->stats()->add_value_iops( + gkfs::utils::Stats::IOPS_OP::IOPS_CREATE); } catch(const gkfs::metadata::ExistsException& e) { out.err = EEXIST; } catch(const std::exception& e) { @@ -85,14 +87,12 @@ rpc_srv_create(hg_handle_t handle) { __func__, e.what()); out.err = -1; } + GKFS_DATA->spdlogger()->debug("{}() Sending output err '{}'", __func__, out.err); auto hret = margo_respond(handle, &out); if(hret != HG_SUCCESS) { GKFS_DATA->spdlogger()->error("{}() Failed to respond", __func__); - } else { - GKFS_DATA->stats()->add_value_iops( - gkfs::utils::Stats::IOPS_OP::IOPS_CREATE); } // Destroy handle when finished @@ -126,6 +126,8 @@ rpc_srv_stat(hg_handle_t handle) { GKFS_DATA->spdlogger()->debug("{}() path: '{}'", __func__, in.path); std::string val; + GKFS_DATA->stats()->add_value_iops(gkfs::utils::Stats::IOPS_OP::IOPS_STATS); + try { // get the metadata val = gkfs::metadata::get_str(in.path); @@ -245,6 +247,8 @@ rpc_srv_remove_metadata(hg_handle_t handle) { if(S_ISREG(md.mode()) && (md.size() != 0)) GKFS_DATA->storage()->destroy_chunk_space(in.path); } + GKFS_DATA->stats()->add_value_iops( + gkfs::utils::Stats::IOPS_OP::IOPS_REMOVE); } catch(const gkfs::metadata::DBException& e) { GKFS_DATA->spdlogger()->error("{}(): path '{}' message '{}'", __func__, in.path, e.what()); @@ -539,6 +543,8 @@ rpc_srv_get_dirents(hg_handle_t handle) { vector> entries{}; try { entries = gkfs::metadata::get_dirents(in.path); + GKFS_DATA->stats()->add_value_iops( + gkfs::utils::Stats::IOPS_OP::IOPS_DIRENTS); } catch(const ::exception& e) { GKFS_DATA->spdlogger()->error("{}() Error during get_dirents(): '{}'", __func__, e.what()); -- GitLab From 909439b40cd95efe99dbc8e5c443eadeaf3232ed Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Mon, 7 Mar 2022 14:50:42 +0100 Subject: [PATCH 4/6] Added documentation --- CHANGELOG.md | 4 +- README.md | 2 +- docs/sphinx/users/running.md | 1 + include/common/statistics/stats.hpp | 75 +++++++++++++++-------------- 4 files changed, 45 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79745b332..8a5cc4bdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] -- Added Stats gathering in servers +- Added Stats ([!128](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/128)) gathering in servers +- GKFS_CHUNK_STATS enables chunk usage output +- Stats output can be enabled with --output-stats ### New - Added new experimental metadata backend: diff --git a/README.md b/README.md index f76f5e272..93bcb4b62 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ Options: RocksDB is default if not set. Parallax support is experimental. Note, parallaxdb creates a file called rocksdbx with 8GB created in metadir. --parallaxsize TEXT parallaxdb - metadata file size in GB (default 8GB), used only with new files - --output-stats Enables the output of the stats on the stdout (each 10s) for debug + --output-stats TEXT Enables the output of the stats on the FILE (each 10s) for debug --version Print version and exit. ``` diff --git a/docs/sphinx/users/running.md b/docs/sphinx/users/running.md index 4750c151c..2e169d0a0 100644 --- a/docs/sphinx/users/running.md +++ b/docs/sphinx/users/running.md @@ -79,6 +79,7 @@ Options: RocksDB is default if not set. Parallax support is experimental. Note, parallaxdb creates a file called rocksdbx with 8GB created in metadir. --parallaxsize TEXT parallaxdb - metadata file size in GB (default 8GB), used only with new files + --output-stats TEXT Outputs the stats to the file each 10s. --version Print version and exit. ```` diff --git a/include/common/statistics/stats.hpp b/include/common/statistics/stats.hpp index 0334ae4cf..ba18799e4 100644 --- a/include/common/statistics/stats.hpp +++ b/include/common/statistics/stats.hpp @@ -76,54 +76,59 @@ public: IOPS_STATS, IOPS_DIRENTS, IOPS_REMOVE, - }; + }; ///< enum storing IOPS Stats - enum class SIZE_OP { WRITE_SIZE, READ_SIZE }; + enum class SIZE_OP { WRITE_SIZE, READ_SIZE }; ///< enum storing Size Stats private: constexpr static const std::initializer_list all_IOPS_OP = { - IOPS_OP::IOPS_CREATE, IOPS_OP::IOPS_WRITE, IOPS_OP::IOPS_READ, - IOPS_OP::IOPS_STATS, IOPS_OP::IOPS_DIRENTS, IOPS_OP::IOPS_REMOVE}; + IOPS_OP::IOPS_CREATE, + IOPS_OP::IOPS_WRITE, + IOPS_OP::IOPS_READ, + IOPS_OP::IOPS_STATS, + IOPS_OP::IOPS_DIRENTS, + IOPS_OP::IOPS_REMOVE}; ///< Enum IOPS iterator constexpr static const std::initializer_list all_SIZE_OP = { - SIZE_OP::WRITE_SIZE, SIZE_OP::READ_SIZE}; - - const std::vector IOPS_OP_S = {"IOPS_CREATE", "IOPS_WRITE", - "IOPS_READ", "IOPS_STATS", - "IOPS_DIRENTS", "IOPS_REMOVE"}; - const std::vector SIZE_OP_S = {"WRITE_SIZE", "READ_SIZE"}; - std::chrono::time_point last_cached; - /* Measures when we started the server */ - std::chrono::time_point start; - // How many stats will be stored - const unsigned int MAX_STATS = 1000000; - - // Stores total value for global mean - std::map IOPS; - std::map SIZE; - - - // Stores timestamp when an operation comes - // removes if first operation if > 10 minutes - // Different means will be stored and cached 1 minuted + SIZE_OP::WRITE_SIZE, SIZE_OP::READ_SIZE}; ///< Enum SIZE iterator + + const std::vector IOPS_OP_S = { + "IOPS_CREATE", "IOPS_WRITE", "IOPS_READ", + "IOPS_STATS", "IOPS_DIRENTS", "IOPS_REMOVE"}; ///< Stats Labels + const std::vector SIZE_OP_S = {"WRITE_SIZE", + "READ_SIZE"}; ///< Stats Labels + + std::chrono::time_point + start; ///< When we started the server + + const unsigned int MAX_STATS = 1000000; ///< How many stats will be stored + + + std::map + IOPS; ///< Stores total value for global mean + std::map + SIZE; ///< Stores total value for global mean + std::map>> - TIME_IOPS; + TIME_IOPS; ///< Stores timestamp when an operation comes removes if + ///< first operation if > 10 minutes Different means will + ///< be stored and cached 1 minuted + - // For size operations we need to store the timestamp and - // the size std::map, unsigned long long>>> - TIME_SIZE; + TIME_SIZE; ///< For size operations we need to store the timestamp + ///< and the size + - // Thread that outputs stats info - std::thread t_output; - bool output_thread_; + std::thread t_output; ///< Thread that outputs stats info + bool output_thread_; ///< Enables or disables the output thread - // Controls the destruction of the class/stops the thread - bool running = true; + bool running = + true; ///< Controls the destruction of the class/stops the thread /** * @brief Sends all the stats to the screen * Debug Function @@ -135,9 +140,9 @@ private: output(std::chrono::seconds d, std::string file_output); std::map, unsigned int> - CHUNK_READ; + CHUNK_READ; ///< Stores the number of times a chunk/file is read std::map, unsigned int> - CHUNK_WRITE; + CHUNK_WRITE; ///< Stores the number of times a chunk/file is write /** * @brief Called by output to generate CHUNK map -- GitLab From e3d3f8b37d3efad9a91041717ecfed9163ddba63 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Tue, 8 Mar 2022 14:27:12 +0100 Subject: [PATCH 5/6] Testing stats --- .gitlab-ci.yml | 1 + include/daemon/classes/fs_data.hpp | 2 +- src/common/CMakeLists.txt | 4 ++++ src/daemon/classes/fs_data.cpp | 3 ++- src/daemon/daemon.cpp | 9 ++++----- tests/integration/harness/gkfs.py | 3 ++- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b5a077a34..14099db4f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -61,6 +61,7 @@ gkfs: -DGKFS_USE_GUIDED_DISTRIBUTION:BOOL=ON -DGKFS_ENABLE_PARALLAX:BOOL=ON -DGKFS_ENABLE_ROCKSDB:BOOL=ON + -DGKFS_CHUNK_STATS:BOOL=ON ${CI_PROJECT_DIR} - make -j$(nproc) install # reduce artifacts size diff --git a/include/daemon/classes/fs_data.hpp b/include/daemon/classes/fs_data.hpp index 8e1b1a38a..2427e5c4e 100644 --- a/include/daemon/classes/fs_data.hpp +++ b/include/daemon/classes/fs_data.hpp @@ -219,7 +219,7 @@ public: void parallax_size_md(unsigned int size_md); - + const std::shared_ptr& stats() const; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index f0149fcc9..911e6f3fd 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -52,6 +52,10 @@ if(GKFS_ENABLE_CODE_COVERAGE) target_code_coverage(distributor AUTO) endif() +if(GKFS_ENABLE_CODE_COVERAGE) + target_code_coverage(statistics AUTO) +endif() + # get spdlog set(FETCHCONTENT_QUIET ON) diff --git a/src/daemon/classes/fs_data.cpp b/src/daemon/classes/fs_data.cpp index 46b7e5472..cea2a2204 100644 --- a/src/daemon/classes/fs_data.cpp +++ b/src/daemon/classes/fs_data.cpp @@ -219,7 +219,8 @@ void FsData::parallax_size_md(unsigned int size_md) { FsData::parallax_size_md_ = static_cast( size_md * 1024ull * 1024ull * 1024ull); - +} + const std::shared_ptr& FsData::stats() const { return stats_; diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index cb6715bcb..f6bb69b2a 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -586,11 +586,6 @@ parse_input(const cli_options& opts, const CLI::App& desc) { fs::create_directories(rootdir_path); GKFS_DATA->rootdir(rootdir_path.native()); - if(desc.count("--output-stats")) { - GKFS_DATA->output_stats(true); - } - - if(desc.count("--metadir")) { auto metadir = opts.metadir; @@ -655,9 +650,13 @@ parse_input(const cli_options& opts, const CLI::App& desc) { if(desc.count("--parallaxsize")) { // Size in GB GKFS_DATA->parallax_size_md(stoi(opts.parallax_size)); + } if(desc.count("--output-stats")) { auto stats_file = opts.stats_file; GKFS_DATA->stats_file(stats_file); + GKFS_DATA->output_stats(true); + GKFS_DATA->spdlogger()->debug("{}() Stats Enabled: '{}'", __func__, + stats_file); } } diff --git a/tests/integration/harness/gkfs.py b/tests/integration/harness/gkfs.py index 6f5624dae..2b770ae84 100644 --- a/tests/integration/harness/gkfs.py +++ b/tests/integration/harness/gkfs.py @@ -251,7 +251,8 @@ class Daemon: '--rootdir', self.rootdir, '-l', self._address, '--metadir', self._metadir, - '--dbbackend', self._database] + '--dbbackend', self._database, + '--output-stats', self.logdir / 'stats.log' ] if self._database == "parallaxdb" : args.append('--clean-rootdir-finish') -- GitLab From 224e3de3fd1639311b8e490915c9b2ce4167ab8a Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 23 Mar 2022 10:43:00 +0100 Subject: [PATCH 6/6] Updated CHANGELOG --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a5cc4bdf..1082251eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,15 +7,15 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] -- Added Stats ([!128](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/128)) gathering in servers -- GKFS_CHUNK_STATS enables chunk usage output -- Stats output can be enabled with --output-stats ### New - Added new experimental metadata backend: Parallax ([!110](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/110)). - Added support to use multiple metadata backends. - Added `--clean-rootdir-finish` argument to remove rootdir/metadir at the end when the daemon finishes. +- Added Stats ([!128](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/128)) gathering in servers + - GKFS_CHUNK_STATS enables chunk usage output + - Stats output can be enabled with --output-stats ### Changed -- GitLab