Verified Commit 52a03054 authored by Marc Vef's avatar Marc Vef
Browse files

Random Slicing setup

parent 511cda43
Loading
Loading
Loading
Loading
+46 −1
Original line number Diff line number Diff line
@@ -29,7 +29,9 @@
#ifndef GEKKOFS_RPC_DISTRIBUTOR_HPP
#define GEKKOFS_RPC_DISTRIBUTOR_HPP

#include "../include/config.hpp"
#include <config.hpp>
#include <common/rpc/random_slicing/dist_rand_slice.hpp>

#include <vector>
#include <string>
#include <numeric>
@@ -37,6 +39,7 @@
#include <fstream>
#include <map>


namespace gkfs::rpc {

using chunkid_t = unsigned int;
@@ -213,6 +216,48 @@ public:
    locate_directory_metadata(const std::string& path) const override;
};

class RandomSlicingDistributor : public Distributor {
private:
    host_t localhost_;
    std::vector<host_t> all_hosts_;
    std::hash<std::string> str_hash;
    std::shared_ptr<VDRIVE::DistRandSlice> dist_impl_{};

public:
    /**
     * Allow instanciation without knowledge of localhost
     */
    RandomSlicingDistributor(std::shared_ptr<VDRIVE::DistRandSlice> dist_impl);

    RandomSlicingDistributor(host_t localhost,
                             std::shared_ptr<VDRIVE::DistRandSlice> dist_impl);

    host_t
    localhost() const override final;

    unsigned int
    hosts_size() const override;

    host_t
    locate_data(const std::string& path,
                const chunkid_t& chnk_id,
                int num_copy) const override;

    host_t
    locate_file_metadata(const std::string& path,
                         int num_copy) const override;

    std::vector<host_t>
    locate_directory_metadata(const std::string& path) const override;

    /**
     * Convenience function to return the hosts in GekkoFS format i.e.
     * unordered_map<host_id, pair<hostname, uri>>
     */
    std::unordered_map<uint64_t, std::pair<std::string, std::string>>
    get_hosts_map();
};

} // namespace gkfs::rpc

#endif // GEKKOFS_RPC_LOCATOR_HPP
+145 −0
Original line number Diff line number Diff line
// Adapted from https://sourceforge.net/p/dadisi/code/8/tree/trunk/dadisi/
/*
 * File:   Disk.h
 * Author: fermat
 *
 * Created on 20. Januar 2010, 10:39
 */

#ifndef _DISK_H
#define _DISK_H

#define __STDC_LIMIT_MACROS // required for limit macros
#include <stdint.h>
#include <list>
#include <string>

namespace VDRIVE {

/**
 * This class represents a Disk for the Distributor. Each Disk consists of
 * an unique ID (in the list of Disks used by this lib), a capacity and a
 * reference to some data, that can be used by the person instantiating a
 * Disk.
 *
 * @author Sascha Effert <fermat@uni-paderborn.de>
 */
class Disk {
public:
    /**
     * Instantiates a new Disk with the given values.
     *
     * @param id ID of the disk. Has to be unique all over this library.
     * @param capacity Capacity of the Disk in bytes. (This can also be in
     *                 any other scale, but has to be same for ExtentSize
     *                 of Distributor)
     * @param hostname TODO(dauer)
     * @param uri TODO(dauer)
     */
    Disk(uint64_t id, uint64_t capacity, std::string hostname, std::string uri);

    /**
     * copy constructor
     *
     * @param orig original Disk
     */
    Disk(const Disk& orig);

    /**
     * Instantiates a new Disk with values read from the given ifstream.
     * This is assumed to be written previously by Disk::serialize().
     */
    Disk(std::ifstream& in);

    /**
     * Destructor
     */
    virtual ~Disk();

    /**
     * Get the ID of the disk. Has to be unique all over this library.
     *
     * @return ID of the disk. Has to be unique all over this library.
     */
    uint64_t
    getId() const;

    /**
     * Set the ID of the disk. Has to be unique all over this library.
     *
     * @param id ID of the disk. Has to be unique all over this library.
     */
    void
    setId(uint64_t id);

    /**
     * Get the Capacity of the Disk in bytes. (This can also be in any
     * other scale, but has to be same for ExtentSize of Distributor)
     *
     * @return Capacity of the Disk in bytes. (This can also be in any
     *         other scale, but has to be same for ExtentSize of
     *         Distributor)
     */
    uint64_t
    getCapacity() const;

    /**
     * Set the Capacity of the Disk in bytes. (This can also be in any
     * other scale, but has to be same for ExtentSize of Distributor)
     *
     * @param capacity Capacity of the Disk in bytes. (This can also be in
     *                any other scale, but has to be same for ExtentSize of
     *                Distributor)
     */
    void
    setCapacity(uint64_t capacity);

    /**
     */
    std::string
    getHostname() const;

    /**
     * May not be changed by Distributors!
     *
     * @param hostname to be used by developer instantiating this disk.
     *             May not be changed by Distributors!
     */
    void
    setHostname(std::string data);

    std::string
    getUri() const;

    void
    setUri(std::string uri);

    void
    serialize(std::ofstream& out);

private:
    /**
     * ID of the disk. Has to be unique all over this library.
     */
    uint64_t id;

    /**
     * Capacity of the Disk in bytes. (This can also be in any other
     * scale, but has to be same for ExtentSize of Distributor)
     */
    uint64_t capacity;

    /**
     * String to be used by developer instantiating this disk. May not be
     * changed by Distributors!
     */
    std::string hostname;

    /**
     * String to be used by developer instantiating this disk. May not be
     * changed by Distributors!
     */
    std::string uri;
};
} // namespace VDRIVE
#endif /* _DISK_H */
+278 −0
Original line number Diff line number Diff line
// Adapted from https://sourceforge.net/p/dadisi/code/8/tree/trunk/dadisi/
/*
 * File:   DistRandSlice.h
 * Author: amiranda
 *
 * Created on 8, November 2010, 12:24
 */

#ifndef _DISTRANDSLICE_H
#define _DISTRANDSLICE_H


#include <unordered_map>
#include <vector>
#define __STDC_LIMIT_MACROS // required for UINT64_MAX macro
#include <stdint.h>
#include <common/rpc/random_slicing/disk.hpp>
#include <common/rpc/random_slicing/flat_segment_tree.hpp>

// forward declaration for friend class
namespace gkfs::rpc {
class RandomSlicingDistributor;
}

#define DEBUG
#ifdef DEBUG
// #   define DUMP_COPIES
// #   define DUMP_DISKS
// #   define DUMP_PARTITIONS
// #   define DUMP_FREE_SPACE_COLLECTION
// #   define DUMP_FREE_SPACE_ASSIMILATION
// #   define DUMP_FREE_SPACE
// #   define DUMP_INTERVALS
// #   define DUMP_INTERVALS_VERBOSE
#endif


namespace VDRIVE {

/**
 * XXX: Place comment describing the distribution algorithm here
 *
 *
 *
 *
 */
class DistRandSlice {
public:
    static bool constexpr DEBUG_VSPACE = false;
    // constants declaring the min and max values for the
    // virtual allocation space.
    // right now, it spans from 0 to (2^64-1)/10 blocks/files/whatever
    static uint64_t constexpr VSPACE_MIN = 0;
    static uint64_t constexpr VSPACE_MAX =
            DEBUG_VSPACE ? 1000000 : UINT64_MAX / 10;
    // divide by 10 brings the number down a bit to prevent overflows due to
    // rounding

    static double constexpr DOUBLE_ALLOWED_ERROR =
            DEBUG_VSPACE ? 1.0e-5 : 1.0e-14;

    /**
     * Allow direct access e.g. to m_disks to avoid copying and memory
     * management issues.
     */
    friend class gkfs::rpc::RandomSlicingDistributor;

    /**
     * generate a new, uninitialized Rand Slice Implementation with default
     * settings.
     */
    DistRandSlice();

    /**
     * generate a new, uninitialized Rand Slice Implementation.
     */
    DistRandSlice(bool use_even_odd_collection, bool use_sorted_assimilation);

    /**
     * copy constructor
     *
     * @param orig original DistRandSlice
     */
    DistRandSlice(const DistRandSlice& orig);

    /**
     * Destructor
     */
    virtual ~DistRandSlice();

    /**
     * @see Distributor::placeExtent
     */
    uint64_t
    placeExtent(int64_t position);

    /**
     * @see Distributor::setConfiguration
     */
    virtual void
    setConfiguration(std::list<Disk*>* disks, int64_t extentsize,
                     int32_t copies);

    /**
     * @see Distributor::addDisks
     */
    virtual void
    addDisks(std::list<Disk*>* disks);

    /**
     * Added for GekkoFS - Remove the Disks given by their IDs
     * from the distribution.
     * TODO(dauer): Decide & document freeing of memory
     */
    void
    removeDisks(const std::vector<uint64_t>& disk_ids);

    /**
     * @see Distributor::getDisks
     */
    virtual std::list<Disk*>*
    getDisks() const;

    /**
     * @see Distributor::getExtentsize
     */
    virtual int64_t
    getExtentsize() const {
        return m_extentsize;
    }

    /**
     * @see Distributor::getCopies
     */
    virtual int32_t
    getCopies() const {
        return m_copies;
    }

    /**
     * @return the number of partitions currently in the system
     */
    uint64_t
    getNumPartitions() const {
        return m_num_partitions;
    }

    /**
     * @return the number of intervals currently in the system
     */
    uint64_t
    getNumIntervals() const;

    /**
     * Deserialize from file.
     */
    void
    from_file(std::string filename);

    /**
     * Serialize to file.
     */
    void
    to_file(std::string filename);

private:
    void
    cleanup(void);

    void
    create_partitions(std::list<Disk*>* disks);

    void
    add_partitions(std::list<Disk*>* disks);

    uint64_t
    partition_capacity(uint64_t part_id, uint64_t new_total_capacity);

    uint64_t
    partition_capacity(Disk* disk, uint64_t new_total_capacity);

    void
    redistribute(
            std::unordered_map<uint64_t, uint64_t>& old_partitions,
            const std::list<std::pair<uint64_t, uint64_t>>& new_partitions);

    void
    collect_free_space(std::unordered_map<uint64_t, uint64_t>& old_partitions,
                       std::list<std::pair<uint64_t, uint64_t>>& free_space);

    void
    collect_free_space_even_odd(
            std::unordered_map<uint64_t, uint64_t>& old_partitions,
            std::list<std::pair<uint64_t, uint64_t>>& free_space);

    void
    reuse_free_space(
            std::list<std::pair<uint64_t, uint64_t>>& free_space,
            const std::list<std::pair<uint64_t, uint64_t>>& new_partitions);

    void
    reuse_free_space_sort(
            std::list<std::pair<uint64_t, uint64_t>>& free_space,
            const std::list<std::pair<uint64_t, uint64_t>>& new_partitions);


#if defined DEBUG
    void
    dump_intervals(void);
    void
    dump_free_space(const std::list<std::pair<uint64_t, uint64_t>>& l) const;
    void
    verify_partitions(void);
    std::unordered_map<uint64_t, uint64_t>
    compute_interval_sizes(void);
#endif

    /**
     * ExtenSize as given to setConfiguration.
     */
    int64_t m_extentsize;

    /**
     * number of copies to be distributed.
     */
    int32_t m_copies;

    /**
     * the disks as given to addDisks (or setConfiguration)
     */
    std::unordered_map<uint64_t, Disk*>* m_disks;

    /**
     * number of Disks contained by this Distributor.
     */
    uint64_t m_num_disks;

    /**
     * partition info: id -> capacity (Scaled relative to VSPACE_MAX. This
     * unit is also referred to as blocks.)
     */
    std::unordered_map<uint64_t, uint64_t>* m_partitions;

    /**
     * number of partitions
     */
    uint64_t m_num_partitions;

    /**
     * interval tree for searches
     */
    typedef ::mdds::flat_segment_tree<uint64_t, uint64_t> flat_segment_tree;
    flat_segment_tree* m_interval_tree;

    /**
     * capacity of the system (absolute i.e. sum of disk capacities)
     */
    uint64_t m_capacity;


    /**
     * use Even-Odd strategy when collecting free space
     */
    bool m_use_even_odd_collection;

    /**
     * sort free intervals decreasingly when assimilating free space
     */
    bool m_use_sorted_assimilation;


    /**
     * Init value for tree creation, needed for deserialization only.
     */
    uint64_t m_tree_init_value;
};
} // namespace VDRIVE
#endif /* _DISTRANDSLICE_H */
+1 −1
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@
#include <cassert>
#include <limits>

#include "flat_segment_tree_node.hpp"
#include <common/rpc/random_slicing/flat_segment_tree_node.hpp>

#ifdef UNIT_TEST
#include <cstdio>
Loading