Commit 4fb3a7b7 authored by Ramon Nou's avatar Ramon Nou
Browse files

Stats for GekkoFS

parent 126a171c
Loading
Loading
Loading
Loading
+175 −0
Original line number Diff line number Diff line
/*
  Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain
  Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany

  This software was partially supported by the
  EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).

  This software was partially supported by the
  ADA-FS project under the SPPEXA project funded by the DFG.

  This file is part of GekkoFS.

  GekkoFS is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  GekkoFS is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with GekkoFS.  If not, see <https://www.gnu.org/licenses/>.

  SPDX-License-Identifier: GPL-3.0-or-later
*/

#ifndef GKFS_COMMON_STATS_HPP
#define GKFS_COMMON_STATS_HPP

#include <cstdint>
#include <unistd.h>
#include <cassert>
#include <map>
#include <vector>
#include <deque>
#include <chrono>
#include <initializer_list>
/**
 * Provides storage capabilities to provide stats about GekkoFS
 * The information is per server. 
 * We do not provide accurate stats for 1-5-10 minute stats
 * 
 */
namespace gkfs::utils {

/*
    Number of operations (Create, write/ read, remove, mkdir...)
    Size of database (metadata keys, should be not needed, any)
    Size of data (+write - delete)
    Server Bandwidth (write / read operations)

    mean, (lifetime of the server)
    1 minute mean
    5 minute mean
    10 minute mean

    To provide the stats that we need, 
    we need to store the info and the timestamp to calculate it
    A vector should work, with a maximum of elements, 
    The stats will only be calculated when requested
    a cached value will be send (with a deadline)
    */
class Stats{
    enum class IOPS_OP {
        IOPS_CREATE,
        IOPS_WRITE,
        IOPS_READ,
        IOPS_MKDIR,
        IOPS_RMDIR,
        IOPS_REMOVE,
    };

    constexpr static const std::initializer_list<Stats::IOPS_OP> all_IOPS_OP = {IOPS_OP::IOPS_CREATE, IOPS_OP::IOPS_WRITE, IOPS_OP::IOPS_READ, IOPS_OP::IOPS_MKDIR,IOPS_OP::IOPS_RMDIR, IOPS_OP::IOPS_REMOVE};
   
    enum class SIZE_OP {
        METADATA_SIZE,
        WRITE_SIZE,
        READ_SIZE,
        DATA_SIZE      
    };

    constexpr static const std::initializer_list<Stats::SIZE_OP> all_SIZE_OP = {SIZE_OP::METADATA_SIZE, SIZE_OP::DATA_SIZE, SIZE_OP::WRITE_SIZE, SIZE_OP::READ_SIZE};

    std::chrono::time_point<std::chrono::steady_clock> last_cached;
    /* Measures when we started the server */
    std::chrono::time_point<std::chrono::steady_clock> start;
    // How many stats will be stored 
    const unsigned int MAX_STATS = 1000000; 

    // Stores total value for global mean
    std::map <IOPS_OP, unsigned long>  IOPS;
    std::map <SIZE_OP, unsigned long>  SIZE;


    // Stores timestamp when an operation comes
    // removes if first operation if > 10 minutes 
    // Different means will be stored and cached 1 minuted
    std::map <IOPS_OP, std::deque<  std::chrono::time_point<std::chrono::steady_clock> > > TIME_IOPS;
    // We will store 1, 5, and 10 minute mean;
    std::map <IOPS_OP, std::vector<double> > CACHED_IOPS;

    // For size operations we need to store the timestamp and
    // the size
    std::map <enum SIZE_OP, 
                std::deque < 
                    std::pair <  std::chrono::time_point<std::chrono::steady_clock> , unsigned long long > >
             > TIME_SIZE;
    // We will store 1, 5, and 10 minute mean;
    std::map < enum SIZE_OP, std::vector <double> > CACHED_SIZE; 

/**
 * @brief Starts the Stats module and initializes structures
 * 
 */
public: 
    Stats();


/**
 * Add a new value for a IOPS, that does not involve any size
 * No value needed as they are simple (1 create, 1 read...)
 * Size operations internally call this operation (read,write)
 *
 * @param IOPS_OP Which operation to add
 */

void add_value_iops (enum IOPS_OP);   

/**
 * @brief Store a new stat point, with a size value.
 * If it involves a IO operations it will call the corresponding
 * operation 
 * 
 * @param SIZE_OP Which operation we refer
 * @param value to store (SIZE_OP) 
 */
void add_value_size (enum SIZE_OP, unsigned long long value);

/**
 * @brief Get the total mean value of the asked stat
 * This can be provided inmediately without cost
 * @return mean value
 */
double get_mean (enum IOPS_OP);


/**
 * @brief Get the total mean value of the asked stat
 * This can be provided inmediately without cost
 * @return mean value
 */
double get_mean (enum SIZE_OP);

/**
 * @brief Get all the means (total, 1,5 and 10 minutes) for a SIZE_OP
 * Returns precalculated values if we just calculated them 1 minute ago
 * 
 * @return std::vector< double > with 4 means
 */
std::vector< double > get_four_means (enum SIZE_OP);

/**
 * @brief Get all the means (total, 1,5 and 10 minutes) for a IOPS_OP
 * Returns precalculated values if we just calculated them 1 minute ago
 * 
 * @return std::vector< double > with 4 means
 */
std::vector< double > get_four_means (enum IOPS_OP);
};

} // namespace gkfs::utils

#endif // GKFS_COMMON_STATS_HPP
 No newline at end of file
+15 −0
Original line number Diff line number Diff line
@@ -46,6 +46,11 @@ namespace data {
class ChunkStorage;
}

/* Forward declarations */
namespace utils {
class Stats;
}

namespace daemon {

class FsData {
@@ -85,6 +90,9 @@ private:
    bool link_cnt_state_;
    bool blocks_state_;

    // Statistics
    std::shared_ptr<gkfs::utils::Stats> stats_;

public:
    static FsData*
    getInstance() {
@@ -209,6 +217,13 @@ public:

    void
    parallax_size_md(unsigned int size_md);
    
    const std::shared_ptr<gkfs::utils::Stats>&
    stats() const;

    void
    stats(const std::shared_ptr<gkfs::utils::Stats>& stats);

};

} // namespace daemon
+164 −0
Original line number Diff line number Diff line
/*
  Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain
  Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany

  This software was partially supported by the
  EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).

  This software was partially supported by the
  ADA-FS project under the SPPEXA project funded by the DFG.

  This file is part of GekkoFS.

  GekkoFS is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  GekkoFS is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with GekkoFS.  If not, see <https://www.gnu.org/licenses/>.

  SPDX-License-Identifier: GPL-3.0-or-later
*/


#include "/home/rnou/gekkofs/include/common/statistics/stats.hpp"

using namespace std;

namespace gkfs::utils{

    Stats::Stats(){

        // Init clocks
        start = std::chrono::steady_clock::now();
        last_cached = std::chrono::steady_clock::now();
        // Init cached (4 mean values)
      
        for (auto e : all_IOPS_OP) 
            for (int i = 0; i < 4; i++) CACHED_IOPS[e].push_back(0.0);

        for (auto e : all_SIZE_OP) 
            for (int i = 0; i < 4; i++) CACHED_SIZE[e].push_back(0.0);


        // To simplify the control we add an element into the different maps
        // Statistaclly will be negligible... and we get a faster flow

        for (auto e : all_IOPS_OP) {
            IOPS[e] = 0;
            TIME_IOPS[e].push_back(std::chrono::steady_clock::now());
        }

        for (auto e : all_SIZE_OP) {
            SIZE[e] = 0; 
            TIME_SIZE[e].push_back(pair(std::chrono::steady_clock::now(),0.0));
        }
    }

    void Stats::add_value_iops (enum IOPS_OP iop){
        IOPS[iop]++;
        auto now = std::chrono::steady_clock::now();

        
        if ( (now - TIME_IOPS[iop].front()) > std::chrono::duration(10s) ) {
            TIME_IOPS[iop].pop_front();
        }
        else if (TIME_IOPS[iop].size() >= MAX_STATS) TIME_IOPS[iop].pop_front();

        TIME_IOPS[iop].push_back(std::chrono::steady_clock::now());
    }

    void Stats::add_value_size (enum SIZE_OP iop, unsigned long long value){
        auto now = std::chrono::steady_clock::now();
        SIZE[iop] += value;
        if ( (now - TIME_SIZE[iop].front().first) > std::chrono::duration(10s) ) {
            TIME_SIZE[iop].pop_front();
        }
        else if (TIME_SIZE[iop].size() >= MAX_STATS) TIME_SIZE[iop].pop_front();

        TIME_SIZE[iop].push_back(pair( std::chrono::steady_clock::now(), value ) );
        
        if (iop == SIZE_OP::READ_SIZE) IOPS[IOPS_OP::IOPS_READ]++;
        else if (iop == SIZE_OP::WRITE_SIZE) IOPS[IOPS_OP::IOPS_WRITE]++;
    }

    /**
     * @brief Get the total mean value of the asked stat
     * This can be provided inmediately without cost
     * @return mean value
     */
    double Stats::get_mean (enum SIZE_OP sop){
        auto now = std::chrono::steady_clock::now();
        auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - start);
        double value = (double)SIZE[sop] / (double)duration.count();
        return value;

    }

    double Stats::get_mean (enum IOPS_OP iop){
        auto now = std::chrono::steady_clock::now();
        auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - start);
        double value = (double)IOPS[iop] / (double)duration.count();
        return value;
    }


/**
 * @brief Get all the means (total, 1,5 and 10 minutes) for a SIZE_OP
 * Returns precalculated values if we just calculated them 1 minute ago
 * // TODO: cache
 * @return std::vector< double > with 4 means
 */
    std::vector< double > Stats::get_four_means (enum SIZE_OP sop){
        std::vector < double > results = {0,0,0,0};
        auto now = std::chrono::steady_clock::now();
        for (auto e : TIME_SIZE[sop]) {
            auto duration = std::chrono::duration_cast<std::chrono::minutes>(now - e.first).count();
            if (duration > 10) break;

            results[3] += e.second;
            if (duration > 5) continue;
            results[2] += e.second;
            if (duration > 1) continue;
            results[1] += e.second;
        }

        results[0] = get_mean(sop);
        results[3] /= 10*60;
        results[2] /= 5*60;
        results[1] /= 60;

        return results;
    }


    std::vector< double > Stats::get_four_means (enum IOPS_OP iop){
            std::vector < double > results = {0,0,0,0};
            auto now = std::chrono::steady_clock::now();
            for (auto e : TIME_IOPS[iop]) {
                auto duration = std::chrono::duration_cast<std::chrono::minutes>(now - e).count();
                if (duration > 10) break;

                results[3] ++;
                if (duration > 5) continue;
                results[2] ++;
                if (duration > 1) continue;
                results[1] ++;
            }

            results[0] = get_mean(iop);
            results[3] /= 10*60;
            results[2] /= 5*60;
            results[1] /= 60;

            return results;
    }


} // namespace gkfs::utils::stats
+1 −0
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@ set(DAEMON_LINK_LIBRARIES
    metadata_db
    storage
    distributor
    statistics
    log_util
    env_util
    spdlog
+9 −0
Original line number Diff line number Diff line
@@ -219,6 +219,15 @@ void
FsData::parallax_size_md(unsigned int size_md) {
    FsData::parallax_size_md_ = static_cast<unsigned long long>(
            size_md * 1024ull * 1024ull * 1024ull);
            
const std::shared_ptr<gkfs::utils::Stats>&
FsData::stats() const {
    return stats_;
}

void
FsData::stats(const std::shared_ptr<gkfs::utils::Stats>& stats) {
    stats_ = stats;
}

} // namespace gkfs::daemon
Loading