Commit f077366c authored by Ramon Nou's avatar Ramon Nou
Browse files

Defines to Options

parent 07bb4d4a
Pipeline #2393 passed with stages
in 35 minutes and 49 seconds
......@@ -11,15 +11,17 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
### New
- Added Stats ([!132](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/132)) gathering in servers
- GKFS_CHUNK_STATS enables chunk usage output
- Stats output can be enabled with --output-stats <filename>
- --enable-collection collects normal stats
- --enable-chunkstats collects extended chunk stats
- Added new experimental metadata backend:
Parallax ([!110](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/110)).
- Added support to use multiple metadata backends.
- Added `--clean-rootdir-finish` argument to remove rootdir/metadir at the end when the daemon finishes.
- Added Prometheus Ouput ([!132](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/132))
- Added Prometheus Output ([!132](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/132))
- New option to define gateway --prometheus-gateway <gateway:port>
- Prometheus output is optional with "GKFS_ENABLE_PROMETHEUS"
- --enable-prometheus creates a thread to push the metrics.
### Changed
......
......@@ -195,12 +195,6 @@ if(GKFS_USE_GUIDED_DISTRIBUTION)
message(STATUS "[gekkofs] Guided data distributor input file path: ${GKFS_USE_GUIDED_DISTRIBUTION_PATH}")
endif()
option(GKFS_CHUNK_STATS "Gather Chunk Stats " OFF)
if (GKFS_CHUNK_STATS)
add_definitions(-DGKFS_CHUNK_STATS)
endif ()
message(STATUS "[gekkofs] Gather Chunk Stats: ${GKFS_CHUNK_STATS}")
option(GKFS_ENABLE_PROMETHEUS "Enable Prometheus Push " OFF)
if(GKFS_ENABLE_PROMETHEUS)
add_definitions(-DGKFS_ENABLE_PROMETHEUS)
......
......@@ -240,10 +240,8 @@ To enable it use the `-DGKFS_ENABLE_PARALLAX:BOOL=ON` option, you can also disab
Once it is enabled, `--dbbackend` option will be functional.
### Stats
Pushing stats to Prometheus is enabled with the `-DGKFS_ENABLE_PROMETHEUS` and the setup of the `--output-stats <FILE>`.
Without the last one, the push to the gateway is disabled.
Stats for each chunk (read-write access) can be enabled with `-DGKFS_CHUNK_STATS`. The server will store file/chunk number stats.
Stats from each server are written to the file specified with `--output-stats <FILE>`. Collection is done with two separate flags `--enable-collection` and `--enable-chunkstats`. For normal and extended chunk stats. The extended chunk stats stores each chunk acccess.
Pushing stats to Prometheus is enabled with the `-DGKFS_ENABLE_PROMETHEUS` and the flag `--enable-prometheus`. We are using a push model.
### Acknowledgment
......
......@@ -37,6 +37,7 @@
#include <vector>
#include <deque>
#include <chrono>
#include <optional>
#include <initializer_list>
#include <thread>
#include <iostream>
......@@ -133,8 +134,11 @@ private:
///< and the size
std::thread t_output; ///< Thread that outputs stats info
bool output_thread_; ///< Enables or disables the output thread
std::thread t_output; ///< Thread that outputs stats info
bool output_thread_; ///< Enables or disables the output thread
bool enable_prometheus_; ///< Enables or disables the prometheus output
bool enable_chunkstats_; ///< Enables or disables the chunk stats output
bool running =
true; ///< Controls the destruction of the class/stops the thread
......@@ -185,12 +189,13 @@ private:
public:
/**
* @brief Starts the Stats module and initializes structures
* @param output_thread creates an aditional thread that outputs the stats
* @param enable_chunkstats Enables or disables the chunk stats
* @param enable_prometheus Enables or disables the prometheus output
* @param filename file where to write the output
* @param prometheus_gateway ip:port to expose the metrics
*/
Stats(bool output_thread, const std::string& filename,
const std::string& prometheus_gateway);
Stats(bool enable_chunkstats, bool enable_prometheus,
const std::string& filename, const std::string& prometheus_gateway);
/**
* @brief Destroys the class, and any associated thread
......
......@@ -92,7 +92,9 @@ private:
// Statistics
std::shared_ptr<gkfs::utils::Stats> stats_;
bool output_stats_ = false;
bool enable_stats_ = false;
bool enable_chunkstats_ = false;
bool enable_prometheus_ = false;
std::string stats_file_;
// Prometheus
......@@ -233,10 +235,22 @@ public:
close_stats();
bool
output_stats() const;
enable_stats() const;
void
output_stats(bool output_stats);
enable_stats(bool enable_stats);
bool
enable_chunkstats() const;
void
enable_chunkstats(bool enable_chunkstats);
bool
enable_prometheus() const;
void
enable_prometheus(bool enable_prometheus);
const std::string&
stats_file() const;
......
......@@ -80,7 +80,8 @@ Stats::setup_Prometheus(const std::string& gateway_ip,
#endif /// GKFS_ENABLE_PROMETHEUS
}
Stats::Stats(bool output_thread, const std::string& stats_file,
Stats::Stats(bool enable_chunkstats, bool enable_prometheus,
const std::string& stats_file,
const std::string& prometheus_gateway) {
// Init clocks
......@@ -104,10 +105,11 @@ Stats::Stats(bool output_thread, const std::string& stats_file,
setup_Prometheus(prometheus_gateway.substr(0, pos_separator),
prometheus_gateway.substr(pos_separator + 1));
#endif
enable_chunkstats_ = enable_chunkstats;
enable_prometheus_ = enable_prometheus;
output_thread_ = output_thread;
if(output_thread_) {
if(!stats_file.empty() || enable_prometheus_) {
output_thread_ = true;
t_output = std::thread([this, stats_file] {
output(std::chrono::duration(10s), stats_file);
});
......@@ -115,7 +117,6 @@ Stats::Stats(bool output_thread, const std::string& stats_file,
}
Stats::~Stats() {
// We do not need a mutex for that
if(output_thread_) {
running = false;
t_output.join();
......@@ -181,7 +182,9 @@ Stats::add_value_iops(enum IopsOp iop) {
TimeIops[iop].push_back(std::chrono::steady_clock::now());
#ifdef GKFS_ENABLE_PROMETHEUS
iops_Prometheus[iop]->Increment();
if(enable_prometheus_) {
iops_Prometheus[iop]->Increment();
}
#endif
}
......@@ -196,7 +199,9 @@ Stats::add_value_size(enum SizeOp iop, unsigned long long value) {
TimeSize[iop].push_back(pair(std::chrono::steady_clock::now(), value));
#ifdef GKFS_ENABLE_PROMETHEUS
size_Prometheus[iop]->Observe(value);
if(enable_prometheus_) {
size_Prometheus[iop]->Observe(value);
}
#endif
if(iop == SizeOp::read_size)
add_value_iops(IopsOp::iops_read);
......@@ -312,20 +317,25 @@ Stats::dump(std::ofstream& of) {
void
Stats::output(std::chrono::seconds d, std::string file_output) {
int times = 0;
std::ofstream of(file_output, std::ios_base::openmode::_S_trunc);
std::optional<std::ofstream> of;
if(!file_output.empty())
of = std::ofstream(file_output, std::ios_base::openmode::_S_trunc);
while(running) {
dump(of);
if(of)
dump(of.value());
std::chrono::seconds a = 0s;
times++;
#ifdef GKFS_CHUNK_STATS
if(times % 4 == 0)
output_map(of);
#endif
if(enable_chunkstats_ and of) {
if(times % 4 == 0)
output_map(of.value());
}
#ifdef GKFS_ENABLE_PROMETHEUS
// Prometheus Output
gateway->Push();
if(enable_prometheus_) {
gateway->Push();
}
#endif
while(running and a < d) {
a += 1s;
......
......@@ -236,15 +236,34 @@ FsData::close_stats() {
stats_.reset();
}
bool
FsData::enable_stats() const {
return enable_stats_;
}
void
FsData::enable_stats(bool enable_stats) {
FsData::enable_stats_ = enable_stats;
}
bool
FsData::enable_chunkstats() const {
return enable_chunkstats_;
}
void
FsData::enable_chunkstats(bool enable_chunkstats) {
FsData::enable_chunkstats_ = enable_chunkstats;
}
bool
FsData::output_stats() const {
return output_stats_;
FsData::enable_prometheus() const {
return enable_prometheus_;
}
void
FsData::output_stats(bool output_stats) {
FsData::output_stats_ = output_stats;
FsData::enable_prometheus(bool enable_prometheus) {
FsData::enable_prometheus_ = enable_prometheus;
}
const std::string&
......
......@@ -296,8 +296,8 @@ init_environment() {
// Initialize Stats
GKFS_DATA->stats(std::make_shared<gkfs::utils::Stats>(
GKFS_DATA->output_stats(), GKFS_DATA->stats_file(),
GKFS_DATA->prometheus_gateway()));
GKFS_DATA->enable_chunkstats(), GKFS_DATA->enable_prometheus(),
GKFS_DATA->stats_file(), GKFS_DATA->prometheus_gateway()));
// Initialize data backend
auto chunk_storage_path = fmt::format("{}/{}", GKFS_DATA->rootdir(),
......@@ -653,15 +653,34 @@ parse_input(const cli_options& opts, const CLI::App& desc) {
if(desc.count("--parallaxsize")) { // Size in GB
GKFS_DATA->parallax_size_md(stoi(opts.parallax_size));
}
if(desc.count("--output-stats")) {
auto stats_file = opts.stats_file;
GKFS_DATA->stats_file(stats_file);
GKFS_DATA->output_stats(true);
GKFS_DATA->spdlogger()->debug("{}() Stats Enabled: '{}'", __func__,
stats_file);
} else {
GKFS_DATA->stats_file("");
GKFS_DATA->spdlogger()->debug("{}() Stats Output Disabled", __func__);
}
if(desc.count("--enable-collection")) {
GKFS_DATA->enable_stats(true);
GKFS_DATA->spdlogger()->debug("{}() Collection Enabled", __func__);
}
if(desc.count("--enable-chunkstats")) {
GKFS_DATA->enable_chunkstats(true);
GKFS_DATA->spdlogger()->debug("{}() ChunkStats Enabled", __func__);
}
#ifdef GKFS_ENABLE_PROMETHEUS
if(desc.count("--enable-prometheus")) {
GKFS_DATA->enable_prometheus(true);
GKFS_DATA->spdlogger()->debug("{}() Prometheus Enabled", __func__);
}
if(desc.count("--prometheus-gateway")) {
auto gateway = opts.prometheus_gateway;
GKFS_DATA->prometheus_gateway(gateway);
......@@ -739,10 +758,22 @@ main(int argc, const char* argv[]) {
desc.add_option(
"--output-stats", opts.stats_file,
"Creates a thread that outputs the server stats each 10s, to the file specified");
desc.add_flag(
"--enable-collection",
"Enables collection of normal stats, independent of the output-stats option");
desc.add_flag(
"--enable-chunkstats",
"Enables collection of chunkstats stats, independent of the output-stats option")
;
#ifdef GKFS_ENABLE_PROMETHEUS
desc.add_flag(
"--enable-prometheus",
"Enables prometheus output, enables thread");
desc.add_option(
"--prometheus-gateway", opts.prometheus_gateway,
"Defines the prometheus gateway, default is 127.0.0.1:9091, experimental enable at compilation");
"Defines the prometheus gateway, default is 127.0.0.1:9091");
#endif
desc.add_flag("--version", "Print version and exit.");
// clang-format on
......
......@@ -114,9 +114,10 @@ rpc_srv_write(hg_handle_t handle) {
"{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'",
__func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n,
in.total_chunk_size, bulk_size, in.offset);
GKFS_DATA->stats()->add_value_size(gkfs::utils::Stats::SizeOp::write_size,
bulk_size);
if(GKFS_DATA->enable_stats()) {
GKFS_DATA->stats()->add_value_size(
gkfs::utils::Stats::SizeOp::write_size, bulk_size);
}
#ifdef GKFS_ENABLE_AGIOS
int* data;
......@@ -238,9 +239,10 @@ rpc_srv_write(hg_handle_t handle) {
__func__, chnk_id_file, host_id, chnk_id_curr);
continue;
}
#ifdef GKFS_CHUNK_STATS
GKFS_DATA->stats()->add_write(in.path, chnk_id_file);
#endif
if(GKFS_DATA->enable_chunkstats()) {
GKFS_DATA->stats()->add_write(in.path, chnk_id_file);
}
#endif
chnk_ids_host[chnk_id_curr] =
......@@ -412,9 +414,10 @@ rpc_srv_read(hg_handle_t handle) {
"{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'",
__func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n,
in.total_chunk_size, bulk_size, in.offset);
GKFS_DATA->stats()->add_value_size(gkfs::utils::Stats::SizeOp::read_size,
bulk_size);
if(GKFS_DATA->enable_stats()) {
GKFS_DATA->stats()->add_value_size(
gkfs::utils::Stats::SizeOp::read_size, bulk_size);
}
#ifdef GKFS_ENABLE_AGIOS
int* data;
......@@ -525,7 +528,9 @@ rpc_srv_read(hg_handle_t handle) {
__func__, chnk_id_file, host_id, chnk_id_curr);
continue;
}
GKFS_DATA->stats()->add_read(in.path, chnk_id_file);
if(GKFS_DATA->enable_chunkstats()) {
GKFS_DATA->stats()->add_read(in.path, chnk_id_file);
}
#endif
chnk_ids_host[chnk_id_curr] =
......
......@@ -78,8 +78,6 @@ rpc_srv_create(hg_handle_t handle) {
// create metadentry
gkfs::metadata::create(in.path, md);
out.err = 0;
GKFS_DATA->stats()->add_value_iops(
gkfs::utils::Stats::IopsOp::iops_create);
} catch(const gkfs::metadata::ExistsException& e) {
out.err = EEXIST;
} catch(const std::exception& e) {
......@@ -98,6 +96,10 @@ rpc_srv_create(hg_handle_t handle) {
// Destroy handle when finished
margo_free_input(handle, &in);
margo_destroy(handle);
if(GKFS_DATA->enable_stats()) {
GKFS_DATA->stats()->add_value_iops(
gkfs::utils::Stats::IopsOp::iops_create);
}
return HG_SUCCESS;
}
......@@ -126,8 +128,6 @@ rpc_srv_stat(hg_handle_t handle) {
GKFS_DATA->spdlogger()->debug("{}() path: '{}'", __func__, in.path);
std::string val;
GKFS_DATA->stats()->add_value_iops(gkfs::utils::Stats::IopsOp::iops_stats);
try {
// get the metadata
val = gkfs::metadata::get_str(in.path);
......@@ -154,6 +154,11 @@ rpc_srv_stat(hg_handle_t handle) {
// Destroy handle when finished
margo_free_input(handle, &in);
margo_destroy(handle);
if(GKFS_DATA->enable_stats()) {
GKFS_DATA->stats()->add_value_iops(
gkfs::utils::Stats::IopsOp::iops_stats);
}
return HG_SUCCESS;
}
......@@ -247,8 +252,7 @@ rpc_srv_remove_metadata(hg_handle_t handle) {
if(S_ISREG(md.mode()) && (md.size() != 0))
GKFS_DATA->storage()->destroy_chunk_space(in.path);
}
GKFS_DATA->stats()->add_value_iops(
gkfs::utils::Stats::IopsOp::iops_remove);
} catch(const gkfs::metadata::DBException& e) {
GKFS_DATA->spdlogger()->error("{}(): path '{}' message '{}'", __func__,
in.path, e.what());
......@@ -273,6 +277,10 @@ rpc_srv_remove_metadata(hg_handle_t handle) {
// Destroy handle when finished
margo_free_input(handle, &in);
margo_destroy(handle);
if(GKFS_DATA->enable_stats()) {
GKFS_DATA->stats()->add_value_iops(
gkfs::utils::Stats::IopsOp::iops_remove);
}
return HG_SUCCESS;
}
......@@ -543,8 +551,6 @@ rpc_srv_get_dirents(hg_handle_t handle) {
vector<pair<string, bool>> entries{};
try {
entries = gkfs::metadata::get_dirents(in.path);
GKFS_DATA->stats()->add_value_iops(
gkfs::utils::Stats::IopsOp::iops_dirent);
} catch(const ::exception& e) {
GKFS_DATA->spdlogger()->error("{}() Error during get_dirents(): '{}'",
__func__, e.what());
......@@ -645,6 +651,10 @@ rpc_srv_get_dirents(hg_handle_t handle) {
GKFS_DATA->spdlogger()->debug(
"{}() Sending output response err '{}' dirents_size '{}'. DONE",
__func__, out.err, out.dirents_size);
if(GKFS_DATA->enable_stats()) {
GKFS_DATA->stats()->add_value_iops(
gkfs::utils::Stats::IopsOp::iops_dirent);
}
return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle);
}
......
......@@ -252,7 +252,9 @@ class Daemon:
'-l', self._address,
'--metadir', self._metadir,
'--dbbackend', self._database,
'--output-stats', self.logdir / 'stats.log' ]
'--output-stats', self.logdir / 'stats.log',
'--enable-collection',
'--enable-chunkstats' ]
if self._database == "parallaxdb" :
args.append('--clean-rootdir-finish')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment