Verified Commit 7be777ef authored by Marc Vef's avatar Marc Vef
Browse files

statistics daemon argument changes, documentation, cmake

parent b5907694
Loading
Loading
Loading
Loading
+10 −8
Original line number Diff line number Diff line
@@ -10,18 +10,20 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

### New

- Added Stats ([!132](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/132)) gathering in servers
  - Stats output can be enabled with --output-stats <filename>
  - --enable-collection collects normal stats
  - --enable-chunkstats collects extended chunk stats
- Added statistics gathering on daemons ([!132](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/132)).
    - Stats output can be enabled with:
    - `--enable-collection` collects normal statistics.
    - `--enable-chunkstats` collects extended chunk statistics.
- Statistics output to file is controlled by `--output-stats <filename>`
- Added Prometheus support for outputting
  statistics ([!132](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/132)):
    - Prometheus dependency optional and enabled at compile time with the CMake argument `GKFS_ENABLE_PROMETHEUS`.
    - `--enable-prometheus` enables statistics pushing to Prometheus if statistics are enabled.
    - `--prometheus-gateway` sets an IP and port for the Prometheus connection.
- Added new experimental metadata backend:
  Parallax ([!110](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/110)).
    - Added support to use multiple metadata backends.
    - Added `--clean-rootdir-finish` argument to remove rootdir/metadir at the end when the daemon finishes.
- Added Prometheus Output ([!132](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/132))
  - New option to define gateway --prometheus-gateway <gateway:port>
  - Prometheus output is optional with "GKFS_ENABLE_PROMETHEUS"
  - --enable-prometheus creates a thread to push the metrics.

### Changed

+9 −7
Original line number Diff line number Diff line
@@ -52,18 +52,20 @@ target_sources(statistics
if(GKFS_ENABLE_PROMETHEUS)
    find_package(CURL REQUIRED)
    find_package(prometheus-cpp REQUIRED)
    set(PROMETHEUS_LIB
    prometheus-cpp-pull
    prometheus-cpp-push
    prometheus-cpp-core
    set(PROMETHEUS_LINK_LIBRARIES
        prometheus-cpp::pull
        prometheus-cpp::push
        prometheus-cpp::core
        curl)
    target_include_directories(statistics PRIVATE ${prometheus-cpp_INCLUDE_DIR})
endif()

  target_link_libraries(statistics
      PRIVATE
  ${PROMETHEUS_LIB}
      ${PROMETHEUS_LINK_LIBRARIES}
  )


if(GKFS_ENABLE_CODE_COVERAGE)
  target_code_coverage(distributor AUTO)
  target_code_coverage(statistics AUTO)
+5 −5
Original line number Diff line number Diff line
@@ -82,7 +82,9 @@ Stats::setup_Prometheus(const std::string& gateway_ip,

Stats::Stats(bool enable_chunkstats, bool enable_prometheus,
             const std::string& stats_file,
             const std::string& prometheus_gateway) {
             const std::string& prometheus_gateway)
    : enable_prometheus_(enable_prometheus),
      enable_chunkstats_(enable_chunkstats) {

    // Init clocks
    start = std::chrono::steady_clock::now();
@@ -105,8 +107,6 @@ Stats::Stats(bool enable_chunkstats, bool enable_prometheus,
    setup_Prometheus(prometheus_gateway.substr(0, pos_separator),
                     prometheus_gateway.substr(pos_separator + 1));
#endif
    enable_chunkstats_ = enable_chunkstats;
    enable_prometheus_ = enable_prometheus;

    if(!stats_file.empty() || enable_prometheus_) {
        output_thread_ = true;
@@ -331,7 +331,7 @@ Stats::output(std::chrono::seconds d, std::string file_output) {

        times++;

        if(enable_chunkstats_ and of) {
        if(enable_chunkstats_ && of) {
            if(times % 4 == 0)
                output_map(of.value());
        }
@@ -340,7 +340,7 @@ Stats::output(std::chrono::seconds d, std::string file_output) {
            gateway->Push();
        }
#endif
        while(running and a < d) {
        while(running && a < d) {
            a += 1s;
            std::this_thread::sleep_for(1s);
        }
+46 −25
Original line number Diff line number Diff line
@@ -654,40 +654,60 @@ parse_input(const cli_options& opts, const CLI::App& desc) {
        GKFS_DATA->parallax_size_md(stoi(opts.parallax_size));
    }

    if(desc.count("--output-stats")) {
        auto stats_file = opts.stats_file;
        GKFS_DATA->stats_file(stats_file);
        GKFS_DATA->spdlogger()->debug("{}() Stats Enabled: '{}'", __func__,
                                      stats_file);
    } else {
        GKFS_DATA->stats_file("");
        GKFS_DATA->spdlogger()->debug("{}() Stats Output Disabled", __func__);
    }

    /*
     * Statistics collection arguments
     */
    if(desc.count("--enable-collection")) {
        GKFS_DATA->enable_stats(true);
        GKFS_DATA->spdlogger()->debug("{}() Collection Enabled", __func__);
        GKFS_DATA->spdlogger()->info("{}() Statistic collection enabled",
                                     __func__);
    }

    if(desc.count("--enable-chunkstats")) {
        GKFS_DATA->enable_chunkstats(true);
        GKFS_DATA->spdlogger()->debug("{}() ChunkStats Enabled", __func__);
        GKFS_DATA->spdlogger()->info("{}() Chunk statistic collection enabled",
                                     __func__);
    }

#ifdef GKFS_ENABLE_PROMETHEUS
    if(desc.count("--enable-prometheus")) {
        GKFS_DATA->enable_prometheus(true);
        GKFS_DATA->spdlogger()->debug("{}() Prometheus Enabled", __func__);
        if(GKFS_DATA->enable_stats() || GKFS_DATA->enable_chunkstats())
            GKFS_DATA->spdlogger()->info(
                    "{}() Statistics output to Prometheus enabled", __func__);
        else
            GKFS_DATA->spdlogger()->warn(
                    "{}() Prometheus statistic output enabled but no stat collection is enabled. There will be no output to Prometheus",
                    __func__);
    }


    if(desc.count("--prometheus-gateway")) {
        auto gateway = opts.prometheus_gateway;
        GKFS_DATA->prometheus_gateway(gateway);
        GKFS_DATA->spdlogger()->debug("{}() Prometheus Gateway: '{}'", __func__,
                                      gateway);
        if(GKFS_DATA->enable_prometheus())
            GKFS_DATA->spdlogger()->info("{}() Prometheus gateway set to '{}'",
                                         __func__, gateway);
        else
            GKFS_DATA->spdlogger()->warn(
                    "{}() Prometheus gateway was set but Prometheus is disabled.");
    }
#endif

    if(desc.count("--output-stats")) {
        auto stats_file = opts.stats_file;
        GKFS_DATA->stats_file(stats_file);
        if(GKFS_DATA->enable_stats() || GKFS_DATA->enable_chunkstats())
            GKFS_DATA->spdlogger()->info(
                    "{}() Statistics are written to file '{}'", __func__,
                    stats_file);
        else
            GKFS_DATA->spdlogger()->warn(
                    "{}() --output-stats argument used but no stat collection is enabled. There will be no output to file '{}'",
                    __func__, stats_file);
    } else {
        GKFS_DATA->stats_file("");
        GKFS_DATA->spdlogger()->debug("{}() Statistics output disabled",
                                      __func__);
    }
}

/**
@@ -755,24 +775,25 @@ main(int argc, const char* argv[]) {
    desc.add_option("--parallaxsize", opts.parallax_size,
                    "parallaxdb - metadata file size in GB (default 8GB), "
                    "used only with new files");
    desc.add_option(
                "--output-stats", opts.stats_file,
                "Creates a thread that outputs the server stats each 10s, to the file specified");
    desc.add_flag(
                "--enable-collection",
                "Enables collection of normal stats, independent of the output-stats option");
                "Enables collection of general statistics. "
                "Output requires either the --output-stats or --enable-prometheus argument.");
    desc.add_flag(
                "--enable-chunkstats",
                "Enables collection of chunkstats stats, independent of the output-stats option")
                ;
                "Enables collection of data chunk statistics in I/O operations."
                "Output requires either the --output-stats or --enable-prometheus argument.");
    desc.add_option(
                "--output-stats", opts.stats_file,
                "Creates a thread that outputs the server stats each 10s to the specified file.");
    #ifdef GKFS_ENABLE_PROMETHEUS
    desc.add_flag(
                "--enable-prometheus",
                "Enables prometheus output, enables thread");
                "Enables prometheus output and a corresponding thread.");

    desc.add_option(
                "--prometheus-gateway", opts.prometheus_gateway,
                "Defines the prometheus gateway, default is 127.0.0.1:9091");
                "Defines the prometheus gateway <ip:port> (Default 127.0.0.1:9091).");
    #endif

    desc.add_flag("--version", "Print version and exit.");
+15 −10
Original line number Diff line number Diff line
@@ -114,10 +114,7 @@ rpc_srv_write(hg_handle_t handle) {
            "{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'",
            __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n,
            in.total_chunk_size, bulk_size, in.offset);
    if(GKFS_DATA->enable_stats()) {
        GKFS_DATA->stats()->add_value_size(
                gkfs::utils::Stats::SizeOp::write_size, bulk_size);
    }


#ifdef GKFS_ENABLE_AGIOS
    int* data;
@@ -352,7 +349,13 @@ rpc_srv_write(hg_handle_t handle) {
     */
    GKFS_DATA->spdlogger()->debug("{}() Sending output response {}", __func__,
                                  out.err);
    return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle);
    auto handler_ret =
            gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle);
    if(GKFS_DATA->enable_stats()) {
        GKFS_DATA->stats()->add_value_size(
                gkfs::utils::Stats::SizeOp::write_size, bulk_size);
    }
    return handler_ret;
}

/**
@@ -414,10 +417,6 @@ rpc_srv_read(hg_handle_t handle) {
            "{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'",
            __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n,
            in.total_chunk_size, bulk_size, in.offset);
    if(GKFS_DATA->enable_stats()) {
        GKFS_DATA->stats()->add_value_size(
                gkfs::utils::Stats::SizeOp::read_size, bulk_size);
    }

#ifdef GKFS_ENABLE_AGIOS
    int* data;
@@ -619,7 +618,13 @@ rpc_srv_read(hg_handle_t handle) {
     */
    GKFS_DATA->spdlogger()->debug("{}() Sending output response, err: {}",
                                  __func__, out.err);
    return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle);
    auto handler_ret =
            gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle);
    if(GKFS_DATA->enable_stats()) {
        GKFS_DATA->stats()->add_value_size(
                gkfs::utils::Stats::SizeOp::read_size, bulk_size);
    }
    return handler_ret;
}