Commit 9479232e authored by Ramon Nou's avatar Ramon Nou Committed by Ramon Nou
Browse files

Implemented Guided Distributor (rebase master from GekkoFWD), Added Hostname in client logs

parent 4e873c61
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -141,6 +141,16 @@ add_definitions(-DLIBGKFS_LOG_MESSAGE_SIZE=${CLIENT_LOG_MESSAGE_SIZE})
message(STATUS "[gekkofs] Maximum log message size in the client library: ${CLIENT_LOG_MESSAGE_SIZE}")
mark_as_advanced(CLIENT_LOG_MESSAGE_SIZE)

option(USE_GUIDED "Use guided data distributor " OFF)
message(STATUS "[gekkofs] Guided data distributor: ${USE_GUIDED}")

set(USE_GUIDED_PATH "~/guided.txt" CACHE STRING "File Path for guided distributor")
set_property(CACHE USE_GUIDED_PATH PROPERTY STRINGS)
message(STATUS "[gekkofs] Guided data distributor input file path: ${USE_GUIDED_PATH}")

option(TRACE_GUIDED "Output at INFO level information for guided distributor generation: " OFF)
message(STATUS "[gekkofs] Generate log line at INFO level for guided distributor: ${TRACE_GUIDED}")

configure_file(include/global/cmake_configure.hpp.in include/global/cmake_configure.hpp)

# Imported target
+34 −0
Original line number Diff line number Diff line
@@ -239,6 +239,40 @@ can be provided to set the path to the log file, and the log module can be
selected with the `GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace}`
environment variable.

### Data distributors
The data distribution can be selected at compilation time, we have 2 distributors available:

## Simple Hash (Default)
Chunks are distributed randomly to the different GekkoFS servers.

## Guided Distributor
Guided distributor distributes chunks using a shared file with the next format:
`<path> <chunk_number> <host>`

Chunks not specified, are distributed using the Simple Hash distributor.

To generate such file we need to follow a first execution, using the next compilation options:
* `TRACE_GUIDED` ON
* `USE_GUIDED` OFF

This will enable a `INFO` level log at the clients offering several lines that can be used to generate the input file. 
In this stage, each node should generate a separated file this can be done in SLURM using the next line :
`srun -N 10 -n 320 --export="ALL" /bin/bash -c "export LIBGKFS_LOG_OUTPUT=${HOME}/test/GLOBAL.txt;LD_PRELOAD=${GKFS_PRLD} <app>"`

Then, use the `utils/generate.py` to create the output file. 
* `python utils/generate.py ~/test/GLOBAL.txt >> guided.txt`

This should work if the nodes are sorted in alphabetical order, which is the usual scenario.

```
Finally, enable the distributor using the next compilation flags:
* `TRACE_GUIDED` OFF
* `USE_GUIDED` ON
* `USE_GUIDED_PATH` `<path to guided.txt>`





### Acknowledgment

+7 −1
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <memory>
#include <vector>
#include <string>
#include <config.hpp>

#include <bitset>

@@ -92,7 +93,9 @@ private:
    mutable std::mutex internal_fds_mutex_;
    bool internal_fds_must_relocate_;
    std::bitset<MAX_USER_FDS> protected_fds_;

#ifdef TRACE_GUIDED
    std::string hostname = "";
#endif
public:
    static PreloadContext* getInstance() {
        static PreloadContext instance;
@@ -167,6 +170,9 @@ public:
    void protect_user_fds();

    void unprotect_user_fds();
#ifdef TRACE_GUIDED
    std::string get_hostname();
#endif
};

} // namespace preload
+3 −0
Original line number Diff line number Diff line
@@ -16,5 +16,8 @@

#cmakedefine01 CREATE_CHECK_PARENTS
#cmakedefine01 LOG_SYSCALLS
#cmakedefine USE_GUIDED
#cmakedefine TRACE_GUIDED
#define GUIDED_PATH "@USE_GUIDED_PATH@"

#endif //FS_CMAKE_CONFIGURE_H
+23 −0
Original line number Diff line number Diff line
@@ -14,9 +14,12 @@
#ifndef GEKKOFS_RPC_DISTRIBUTOR_HPP
#define GEKKOFS_RPC_DISTRIBUTOR_HPP

#include "../include/config.hpp"
#include <vector>
#include <string>
#include <numeric>
#include <unordered_map>
#include <fstream>

namespace gkfs {
namespace rpc {
@@ -87,6 +90,26 @@ public:
    std::vector<host_t> locate_directory_metadata(const std::string& path) const override;
};

class GuidedDistributor : public Distributor {
private:
    host_t localhost_;
    unsigned int hosts_size_;
    std::vector<host_t> all_hosts_;
    std::hash<std::string> str_hash;
    std::unordered_map< std::string, host_t > mapping;
public:
    GuidedDistributor(host_t localhost, unsigned int hosts_size);

    host_t localhost() const override;

    host_t locate_data(const std::string& path, const chunkid_t& chnk_id) const override;

    host_t locate_file_metadata(const std::string& path) const override;

    std::vector<host_t> locate_directory_metadata(const std::string& path) const override;
};


} // namespace rpc
} // namespace gkfs

Loading