Commit 07bb4d4a authored by Ramon Nou's avatar Ramon Nou
Browse files

Review changes

parent 15058743
......@@ -10,7 +10,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
### New
- Added Stats ([!128](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/128)) gathering in servers
- Added Stats ([!132](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/132)) gathering in servers
- GKFS_CHUNK_STATS enables chunk usage output
- Stats output can be enabled with --output-stats <filename>
- Added new experimental metadata backend:
......
......@@ -242,6 +242,8 @@ Once it is enabled, `--dbbackend` option will be functional.
### Stats
Pushing stats to Prometheus is enabled with the `-DGKFS_ENABLE_PROMETHEUS` and the setup of the `--output-stats <FILE>`.
Without the last one, the push to the gateway is disabled.
Stats for each chunk (read-write access) can be enabled with `-DGKFS_CHUNK_STATS`. The server will store file/chunk number stats.
### Acknowledgment
......
Subproject commit 0af45bfa667f7ff9c78167ef94d975bffbd879f0
Subproject commit 38d821182ef2b6c6961595bf011ca69bf78bc936
Subproject commit eb3220622e73a4889eee355ffa37972b3cac3df5
......@@ -42,6 +42,7 @@
#include <iostream>
#include <iomanip>
#include <fstream>
#include <config.hpp>
// PROMETHEUS includes
......@@ -64,78 +65,72 @@ using namespace prometheus;
*/
namespace gkfs::utils {
/*
Number of operations (Create, write/ read, remove, mkdir...)
Size of database (metadata keys, should be not needed, any)
Size of data (+write - delete)
Server Bandwidth (write / read operations)
mean, (lifetime of the server)
1 minute mean
5 minute mean
10 minute mean
To provide the stats that we need,
we need to store the info and the timestamp to calculate it
A vector should work, with a maximum of elements,
The stats will only be calculated when requested
a cached value will be send (with a deadline)
*/
/**
*
* Number of operations (Create, write/ read, remove, mkdir...)
* Size of database (metadata keys, should be not needed, any)
* Size of data (+write - delete)
* Server Bandwidth (write / read operations)
*
* mean, (lifetime of the server)
* 1 minute mean
* 5 minute mean
* 10 minute mean
*
* To provide the stats that we need,
* we need to store the info and the timestamp to calculate it
* A vector should work, with a maximum of elements,
*/
class Stats {
public:
enum class IOPS_OP {
IOPS_CREATE,
IOPS_WRITE,
IOPS_READ,
IOPS_STATS,
IOPS_DIRENTS,
IOPS_REMOVE,
enum class IopsOp {
iops_create,
iops_write,
iops_read,
iops_stats,
iops_dirent,
iops_remove,
}; ///< enum storing IOPS Stats
enum class SIZE_OP { WRITE_SIZE, READ_SIZE }; ///< enum storing Size Stats
enum class SizeOp { write_size, read_size }; ///< enum storing Size Stats
private:
constexpr static const std::initializer_list<Stats::IOPS_OP> all_IOPS_OP = {
IOPS_OP::IOPS_CREATE,
IOPS_OP::IOPS_WRITE,
IOPS_OP::IOPS_READ,
IOPS_OP::IOPS_STATS,
IOPS_OP::IOPS_DIRENTS,
IOPS_OP::IOPS_REMOVE}; ///< Enum IOPS iterator
constexpr static const std::initializer_list<Stats::SIZE_OP> all_SIZE_OP = {
SIZE_OP::WRITE_SIZE, SIZE_OP::READ_SIZE}; ///< Enum SIZE iterator
const std::vector<std::string> IOPS_OP_S = {
constexpr static const std::initializer_list<Stats::IopsOp> all_IopsOp = {
IopsOp::iops_create, IopsOp::iops_write,
IopsOp::iops_read, IopsOp::iops_stats,
IopsOp::iops_dirent, IopsOp::iops_remove}; ///< Enum IOPS iterator
constexpr static const std::initializer_list<Stats::SizeOp> all_SizeOp = {
SizeOp::write_size, SizeOp::read_size}; ///< Enum SIZE iterator
const std::vector<std::string> IopsOp_s = {
"IOPS_CREATE", "IOPS_WRITE", "IOPS_READ",
"IOPS_STATS", "IOPS_DIRENTS", "IOPS_REMOVE"}; ///< Stats Labels
const std::vector<std::string> SIZE_OP_S = {"WRITE_SIZE",
"READ_SIZE"}; ///< Stats Labels
const std::vector<std::string> SizeOp_s = {"WRITE_SIZE",
"READ_SIZE"}; ///< Stats Labels
std::chrono::time_point<std::chrono::steady_clock>
start; ///< When we started the server
const unsigned int MAX_STATS = 1000000; ///< How many stats will be stored
std::map<IOPS_OP, unsigned long>
std::map<IopsOp, unsigned long>
IOPS; ///< Stores total value for global mean
std::map<SIZE_OP, unsigned long>
std::map<SizeOp, unsigned long>
SIZE; ///< Stores total value for global mean
std::map<IOPS_OP,
std::map<IopsOp,
std::deque<std::chrono::time_point<std::chrono::steady_clock>>>
TIME_IOPS; ///< Stores timestamp when an operation comes removes if
///< first operation if > 10 minutes Different means will
///< be stored and cached 1 minuted
TimeIops; ///< Stores timestamp when an operation comes removes if
///< first operation if > 10 minutes Different means will
///< be stored and cached 1 minuted
std::map<enum SIZE_OP,
std::deque<std::pair<
std::chrono::time_point<std::chrono::steady_clock>,
unsigned long long>>>
TIME_SIZE; ///< For size operations we need to store the timestamp
///< and the size
std::map<SizeOp, std::deque<std::pair<
std::chrono::time_point<std::chrono::steady_clock>,
unsigned long long>>>
TimeSize; ///< For size operations we need to store the timestamp
///< and the size
std::thread t_output; ///< Thread that outputs stats info
......@@ -154,9 +149,9 @@ private:
output(std::chrono::seconds d, std::string file_output);
std::map<std::pair<std::string, unsigned long long>, unsigned int>
CHUNK_READ; ///< Stores the number of times a chunk/file is read
chunkRead; ///< Stores the number of times a chunk/file is read
std::map<std::pair<std::string, unsigned long long>, unsigned int>
CHUNK_WRITE; ///< Stores the number of times a chunk/file is write
chunkWrite; ///< Stores the number of times a chunk/file is write
/**
* @brief Called by output to generate CHUNK map
......@@ -179,10 +174,12 @@ private:
#ifdef GKFS_ENABLE_PROMETHEUS
std::shared_ptr<Gateway> gateway; ///< Prometheus Gateway
std::shared_ptr<Registry> registry; ///< Prometheus Counters Registry
Family<Counter>* family_counter; ///< Prometheus IOPS counter
Family<Summary>* family_summary; ///< Prometheus SIZE counter
std::map<IOPS_OP, Counter*> IOPS_Prometheus;
std::map<SIZE_OP, Summary*> SIZE_Prometheus;
Family<Counter>* family_counter; ///< Prometheus IOPS counter (managed by
///< Prometheus cpp)
Family<Summary>* family_summary; ///< Prometheus SIZE counter (managed by
///< Prometheus cpp)
std::map<IopsOp, Counter*> iops_Prometheus; ///< Prometheus IOPS metrics
std::map<SizeOp, Summary*> size_Prometheus; ///< Prometheus SIZE metrics
#endif
public:
......@@ -190,9 +187,10 @@ public:
* @brief Starts the Stats module and initializes structures
* @param output_thread creates an aditional thread that outputs the stats
* @param filename file where to write the output
* @param prometheus_gateway ip:port to expose the metrics
*/
Stats(bool output_thread, std::string filename,
std::string prometheus_gateway);
Stats(bool output_thread, const std::string& filename,
const std::string& prometheus_gateway);
/**
* @brief Destroys the class, and any associated thread
......@@ -204,28 +202,29 @@ public:
/**
* @brief Set the up Prometheus gateway and structures
*
* @param gateway_ip
* @param gateway_port
* @param gateway_ip ip of the prometheus gateway
* @param gateway_port port of the prometheus gateway
*/
void
setup_Prometheus(std::string gateway_ip, std::string gateway_port);
setup_Prometheus(const std::string& gateway_ip,
const std::string& gateway_port);
/**
* @brief Adds a new read access to the chunk/path specified
*
* @param path
* @param chunk
* @param path path of the chunk
* @param chunk chunk number
*/
void
add_read(std::string path, unsigned long long chunk);
add_read(const std::string& path, unsigned long long chunk);
/**
* @brief Adds a new write access to the chunk/path specified
*
* @param path
* @param chunk
* @param path path of the chunk
* @param chunk chunk number
*/
void
add_write(std::string path, unsigned long long chunk);
add_write(const std::string& path, unsigned long long chunk);
/**
......@@ -233,52 +232,56 @@ public:
* No value needed as they are simple (1 create, 1 read...)
* Size operations internally call this operation (read,write)
*
* @param IOPS_OP Which operation to add
* @param IopsOp Which operation to add
*/
void add_value_iops(enum IOPS_OP);
void add_value_iops(enum IopsOp);
/**
* @brief Store a new stat point, with a size value.
* If it involves a IO operations it will call the corresponding
* operation
*
* @param SIZE_OP Which operation we refer
* @param value to store (SIZE_OP)
* @param SizeOp Which operation we refer
* @param value to store (SizeOp)
*/
void
add_value_size(enum SIZE_OP, unsigned long long value);
add_value_size(enum SizeOp, unsigned long long value);
/**
* @brief Get the total mean value of the asked stat
* This can be provided inmediately without cost
* @param IopsOp Which operation to get
* @return mean value
*/
double get_mean(enum IOPS_OP);
double get_mean(enum IopsOp);
/**
* @brief Get the total mean value of the asked stat
* This can be provided inmediately without cost
* @param SizeOp Which operation to get
* @return mean value
*/
double get_mean(enum SIZE_OP);
double get_mean(enum SizeOp);
/**
* @brief Get all the means (total, 1,5 and 10 minutes) for a SIZE_OP
* Returns precalculated values if we just calculated them 1 minute ago
* @param SizeOp Which operation to get
*
* @return std::vector< double > with 4 means
*/
std::vector<double> get_four_means(enum SIZE_OP);
std::vector<double> get_four_means(enum SizeOp);
/**
* @brief Get all the means (total, 1,5 and 10 minutes) for a IOPS_OP
* Returns precalculated values if we just calculated them 1 minute ago
* @param IopsOp Which operation to get
*
* @return std::vector< double > with 4 means
*/
std::vector<double> get_four_means(enum IOPS_OP);
std::vector<double> get_four_means(enum IopsOp);
};
} // namespace gkfs::utils
......
......@@ -103,6 +103,11 @@ namespace rocksdb {
constexpr auto use_write_ahead_log = false;
} // namespace rocksdb
namespace stats {
constexpr auto max_stats = 1000000; ///< How many stats will be stored
constexpr auto prometheus_gateway = "127.0.0.1:9091";
} // namespace stats
} // namespace gkfs::config
#endif // GEKKOFS_CONFIG_HPP
......@@ -96,7 +96,7 @@ private:
std::string stats_file_;
// Prometheus
std::string prometheus_gateway_ = "127.0.0.1:9091";
std::string prometheus_gateway_ = gkfs::config::stats::prometheus_gateway;
public:
static FsData*
......@@ -238,17 +238,17 @@ public:
void
output_stats(bool output_stats);
std::string
const std::string&
stats_file() const;
void
stats_file(std::string stats_file);
stats_file(const std::string& stats_file);
std::string
const std::string&
prometheus_gateway() const;
void
prometheus_gateway(std::string prometheus_gateway_);
prometheus_gateway(const std::string& prometheus_gateway_);
};
......
......@@ -44,6 +44,7 @@ wgetdeps=(
["psm2"]="11.2.185"
["json-c"]="0.15-20200726"
["curl"]="7.82.0"
["prometheus-cpp"]="v1.0.0"
)
# Dependencies that must be cloned
......@@ -70,7 +71,7 @@ clonedeps_patches=(
# Ordering that MUST be followed when downloading
order=(
"lz4" "capstone" "json-c" "psm2" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept" "date"
"agios" "curl" "parallax"
"agios" "curl" "prometheus-cpp" "parallax"
)
# Extra arguments passed to the installation script. As such, they can
......
......@@ -50,6 +50,7 @@ target_sources(statistics
if(GKFS_ENABLE_PROMETHEUS)
find_package(CURL REQUIRED)
find_package(prometheus-cpp REQUIRED)
set(PROMETHEUS_LIB
prometheus-cpp-pull
......@@ -65,9 +66,6 @@ endif()
if(GKFS_ENABLE_CODE_COVERAGE)
target_code_coverage(distributor AUTO)
endif()
if(GKFS_ENABLE_CODE_COVERAGE)
target_code_coverage(statistics AUTO)
endif()
......
......@@ -46,7 +46,8 @@ GetHostName() {
#endif
void
Stats::setup_Prometheus(std::string gateway_ip, std::string gateway_port) {
Stats::setup_Prometheus(const std::string& gateway_ip,
const std::string& gateway_port) {
// Prometheus Push model. Gateway
#ifdef GKFS_ENABLE_PROMETHEUS
const auto labels = Gateway::GetInstanceLabel(GetHostName());
......@@ -59,9 +60,9 @@ Stats::setup_Prometheus(std::string gateway_ip, std::string gateway_port) {
.Help("Number of IOPS")
.Register(*registry);
for(auto e : all_IOPS_OP) {
IOPS_Prometheus[e] = &family_counter->Add(
{{"operation", IOPS_OP_S[static_cast<int>(e)]}});
for(auto e : all_IopsOp) {
iops_Prometheus[e] = &family_counter->Add(
{{"operation", IopsOp_s[static_cast<int>(e)]}});
}
family_summary = &BuildSummary()
......@@ -69,9 +70,9 @@ Stats::setup_Prometheus(std::string gateway_ip, std::string gateway_port) {
.Help("Size of OPs")
.Register(*registry);
for(auto e : all_SIZE_OP) {
SIZE_Prometheus[e] = &family_summary->Add(
{{"operation", SIZE_OP_S[static_cast<int>(e)]}},
for(auto e : all_SizeOp) {
size_Prometheus[e] = &family_summary->Add(
{{"operation", SizeOp_s[static_cast<int>(e)]}},
Summary::Quantiles{});
}
......@@ -79,8 +80,8 @@ Stats::setup_Prometheus(std::string gateway_ip, std::string gateway_port) {
#endif /// GKFS_ENABLE_PROMETHEUS
}
Stats::Stats(bool output_thread, std::string stats_file,
std::string prometheus_gateway) {
Stats::Stats(bool output_thread, const std::string& stats_file,
const std::string& prometheus_gateway) {
// Init clocks
start = std::chrono::steady_clock::now();
......@@ -88,24 +89,20 @@ Stats::Stats(bool output_thread, std::string stats_file,
// To simplify the control we add an element into the different maps
// Statistaclly will be negligible... and we get a faster flow
for(auto e : all_IOPS_OP) {
for(auto e : all_IopsOp) {
IOPS[e] = 0;
TIME_IOPS[e].push_back(std::chrono::steady_clock::now());
TimeIops[e].push_back(std::chrono::steady_clock::now());
}
for(auto e : all_SIZE_OP) {
for(auto e : all_SizeOp) {
SIZE[e] = 0;
TIME_SIZE[e].push_back(pair(std::chrono::steady_clock::now(), 0.0));
TimeSize[e].push_back(pair(std::chrono::steady_clock::now(), 0.0));
}
#ifdef GKFS_ENABLE_PROMETHEUS
try {
auto pos_separator = prometheus_gateway.find(":");
setup_Prometheus(prometheus_gateway.substr(0, pos_separator),
prometheus_gateway.substr(pos_separator + 1));
} catch(const std::exception& e) {
setup_Prometheus("127.0.0.1", "9091");
}
auto pos_separator = prometheus_gateway.find(":");
setup_Prometheus(prometheus_gateway.substr(0, pos_separator),
prometheus_gateway.substr(pos_separator + 1));
#endif
output_thread_ = output_thread;
......@@ -126,13 +123,13 @@ Stats::~Stats() {
}
void
Stats::add_read(std::string path, unsigned long long chunk) {
CHUNK_READ[pair(path, chunk)]++;
Stats::add_read(const std::string& path, unsigned long long chunk) {
chunkRead[pair(path, chunk)]++;
}
void
Stats::add_write(std::string path, unsigned long long chunk) {
CHUNK_WRITE[pair(path, chunk)]++;
Stats::add_write(const std::string& path, unsigned long long chunk) {
chunkWrite[pair(path, chunk)]++;
}
......@@ -140,26 +137,26 @@ void
Stats::output_map(std::ofstream& output) {
// Ordering
map<unsigned int, std::set<pair<std::string, unsigned long long>>>
ORDER_WRITE;
orderWrite;
map<unsigned int, std::set<pair<std::string, unsigned long long>>>
ORDER_READ;
orderRead;
for(auto i : CHUNK_READ) {
ORDER_READ[i.second].insert(i.first);
for(auto i : chunkRead) {
orderRead[i.second].insert(i.first);
}
for(auto i : CHUNK_WRITE) {
ORDER_WRITE[i.second].insert(i.first);
for(auto i : chunkWrite) {
orderWrite[i.second].insert(i.first);
}
auto CHUNK_MAP =
auto chunkMap =
[](std::string caption,
map<unsigned int,
std::set<pair<std::string, unsigned long long>>>& ORDER,
std::set<pair<std::string, unsigned long long>>>& order,
std::ofstream& output) {
output << caption << std::endl;
for(auto k : ORDER) {
for(auto k : order) {
output << k.first << " -- ";
for(auto v : k.second) {
output << v.first << " // " << v.second << endl;
......@@ -167,44 +164,44 @@ Stats::output_map(std::ofstream& output) {
}
};
CHUNK_MAP("READ CHUNK MAP", ORDER_READ, output);
CHUNK_MAP("WRITE CHUNK MAP", ORDER_WRITE, output);
chunkMap("READ CHUNK MAP", orderRead, output);
chunkMap("WRITE CHUNK MAP", orderWrite, output);
}
void
Stats::add_value_iops(enum IOPS_OP iop) {
Stats::add_value_iops(enum IopsOp iop) {
IOPS[iop]++;
auto now = std::chrono::steady_clock::now();
if((now - TIME_IOPS[iop].front()) > std::chrono::duration(10s)) {
TIME_IOPS[iop].pop_front();
} else if(TIME_IOPS[iop].size() >= MAX_STATS)
TIME_IOPS[iop].pop_front();
if((now - TimeIops[iop].front()) > std::chrono::duration(10s)) {
TimeIops[iop].pop_front();
} else if(TimeIops[iop].size() >= gkfs::config::stats::max_stats)
TimeIops[iop].pop_front();
TIME_IOPS[iop].push_back(std::chrono::steady_clock::now());
TimeIops[iop].push_back(std::chrono::steady_clock::now());
#ifdef GKFS_ENABLE_PROMETHEUS
IOPS_Prometheus[iop]->Increment();
iops_Prometheus[iop]->Increment();
#endif
}
void
Stats::add_value_size(enum SIZE_OP iop, unsigned long long value) {
Stats::add_value_size(enum SizeOp iop, unsigned long long value) {
auto now = std::chrono::steady_clock::now();
SIZE[iop] += value;
if((now - TIME_SIZE[iop].front().first) > std::chrono::duration(10s)) {
TIME_SIZE[iop].pop_front();
} else if(TIME_SIZE[iop].size() >= MAX_STATS)
TIME_SIZE[iop].pop_front();
if((now - TimeSize[iop].front().first) > std::chrono::duration(10s)) {
TimeSize[iop].pop_front();
} else if(TimeSize[iop].size() >= gkfs::config::stats::max_stats)
TimeSize[iop].pop_front();
TIME_SIZE[iop].push_back(pair(std::chrono::steady_clock::now(), value));
TimeSize[iop].push_back(pair(std::chrono::steady_clock::now(), value));
#ifdef GKFS_ENABLE_PROMETHEUS
SIZE_Prometheus[iop]->Observe(value);
size_Prometheus[iop]->Observe(value);
#endif
if(iop == SIZE_OP::READ_SIZE)
add_value_iops(IOPS_OP::IOPS_READ);
else if(iop == SIZE_OP::WRITE_SIZE)
add_value_iops(IOPS_OP::IOPS_WRITE);
if(iop == SizeOp::read_size)
add_value_iops(IopsOp::iops_read);
else if(iop == SizeOp::write_size)
add_value_iops(IopsOp::iops_write);
}
/**
......@@ -213,7 +210,7 @@ Stats::add_value_size(enum SIZE_OP iop, unsigned long long value) {
* @return mean value
*/
double
Stats::get_mean(enum SIZE_OP sop) {
Stats::get_mean(enum SizeOp sop) {
auto now = std::chrono::steady_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::seconds>(now - start);
......@@ -222,7 +219,7 @@ Stats::get_mean(enum SIZE_OP sop) {
}
double
Stats::get_mean(enum IOPS_OP iop) {
Stats::get_mean(enum IopsOp iop) {
auto now = std::chrono::steady_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::seconds>(now - start);
......@@ -231,17 +228,11 @@ Stats::get_mean(enum IOPS_OP iop) {
}
/**
* @brief Get all the means (total, 1,5 and 10 minutes) for a SIZE_OP
* Returns precalculated values if we just calculated them 1 minute ago
* // TODO: cache
* @return std::vector< double > with 4 means
*/