Commit 9bb75e26 authored by Ramon Nou's avatar Ramon Nou
Browse files

merge changes from master

parent 4e873c61
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -141,6 +141,16 @@ add_definitions(-DLIBGKFS_LOG_MESSAGE_SIZE=${CLIENT_LOG_MESSAGE_SIZE})
message(STATUS "[gekkofs] Maximum log message size in the client library: ${CLIENT_LOG_MESSAGE_SIZE}")
mark_as_advanced(CLIENT_LOG_MESSAGE_SIZE)

option(USE_GUIDED "Use guided data distributor " OFF)
message(STATUS "[gekkofs] Guided data distributor: ${USE_GUIDED}")

set(USE_GUIDED_PATH "~/guided.txt" CACHE STRING "File Path for guided distributor")
set_property(CACHE USE_GUIDED_PATH PROPERTY STRINGS)
message(STATUS "[gekkofs] Guided data distributor input file path: ${USE_GUIDED_PATH}")

option(TRACE_GUIDED "Output at INFO level information for guided distributor generation: " OFF)
message(STATUS "[gekkofs] Generate log line at INFO level for guided distributor: ${TRACE_GUIDED}")

configure_file(include/global/cmake_configure.hpp.in include/global/cmake_configure.hpp)

# Imported target
+44 −0
Original line number Diff line number Diff line
@@ -239,6 +239,50 @@ can be provided to set the path to the log file, and the log module can be
selected with the `GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace}`
environment variable.

### Data distributors
The data distribution can be selected at compilation time, we have 2 distributors available:

## Simple Hash (Default)
Chunks are distributed randomly to the different GekkoFS servers.

## Guided Distributor
Guided distributor distributes chunks using a shared file with the next format:
`<path> <chunk_number> <host>`

Chunks not specified, are distributed using the Simple Hash distributor.

To generate such file we need to follow a first execution, using the next compilation options:
* `TRACE_GUIDED` ON
* `USE_GUIDED` OFF

This will enable a `INFO` level log at the clients offering several lines that can be used to generate the input file. 
In this stage, each node should generate a separated file this can be done in SLURM using the next line :
`srun -N 10 -n 320 --export="ALL" /bin/bash -c "export LIBGKFS_LOG_OUTPUT=${HOME}/test/GUIDED-\${SLURMD_NODENAME}.txt;LD_PRELOAD=${GKFS_PRLD} <app>"`

Then, use the `utils/generate.py` to create the output file. 
* `python utils/generate.py <numnode> ~/test/GUIDED-node<x> >> guided.txt`

The next script could be used, normally `SLURM` nodes are set in alphabetical order:

```
rm guided.txt
#/bin/bash
index=0
for i in ~/test/GUIDED-*.txt;
do
echo $i $index
python utils/generate.py $index $i >> guided.txt
index=$((index+1))
done
```
Finally, enable the distributor using the next compilation flags:
* `TRACE_GUIDED` OFF
* `USE_GUIDED` ON
* `USE_GUIDED_PATH` `<path to guided.txt>`





### Acknowledgment

+3 −0
Original line number Diff line number Diff line
@@ -16,5 +16,8 @@

#cmakedefine01 CREATE_CHECK_PARENTS
#cmakedefine01 LOG_SYSCALLS
#cmakedefine USE_GUIDED
#cmakedefine TRACE_GUIDED
#define GUIDED_PATH "@USE_GUIDED_PATH@"

#endif //FS_CMAKE_CONFIGURE_H
+22 −0
Original line number Diff line number Diff line
@@ -14,9 +14,12 @@
#ifndef GEKKOFS_RPC_DISTRIBUTOR_HPP
#define GEKKOFS_RPC_DISTRIBUTOR_HPP

#include "../include/config.hpp"
#include <vector>
#include <string>
#include <numeric>
#include <unordered_map>
#include <fstream>

namespace gkfs {
namespace rpc {
@@ -87,6 +90,25 @@ public:
    std::vector<host_t> locate_directory_metadata(const std::string& path) const override;
};

class GuidedDistributor : public Distributor {
private:
    host_t localhost_;
    unsigned int hosts_size_;
    std::vector<host_t> all_hosts_;
    std::hash<std::string> str_hash;
    std::unordered_map< std::string, host_t > mapping;
public:
    GuidedDistributor(host_t localhost, unsigned int hosts_size);

    host_t localhost() const override;

    host_t locate_data(const std::string& path, const chunkid_t& chnk_id) const override;

    host_t locate_file_metadata(const std::string& path) const override;

    std::vector<host_t> locate_directory_metadata(const std::string& path) const override;
};

} // namespace rpc
} // namespace gkfs

+8 −3
Original line number Diff line number Diff line
@@ -126,9 +126,14 @@ void init_ld_environment_() {
    auto forwarder_dist = std::make_shared<gkfs::rpc::ForwarderDistributor>(CTX->fwd_host_id(), CTX->hosts().size());
    CTX->distributor(forwarder_dist);
#else
    auto simple_hash_dist = std::make_shared<gkfs::rpc::SimpleHashDistributor>(CTX->local_host_id(),

#ifdef USE_GUIDED
    auto distributor = std::make_shared<gkfs::rpc::GuidedDistributor>(CTX->local_host_id(), CTX->hosts().size());
#else   
    auto distributor = std::make_shared<gkfs::rpc::SimpleHashDistributor>(CTX->local_host_id(),
                                                                               CTX->hosts().size());
    CTX->distributor(simple_hash_dist);
#endif    
    CTX->distributor(distributor);
#endif  

    LOG(INFO, "Retrieving file system configuration...");
Loading