Loading CMakeLists.txt +10 −0 Original line number Diff line number Diff line Loading @@ -141,6 +141,16 @@ add_definitions(-DLIBGKFS_LOG_MESSAGE_SIZE=${CLIENT_LOG_MESSAGE_SIZE}) message(STATUS "[gekkofs] Maximum log message size in the client library: ${CLIENT_LOG_MESSAGE_SIZE}") mark_as_advanced(CLIENT_LOG_MESSAGE_SIZE) option(USE_GUIDED "Use guided data distributor " OFF) message(STATUS "[gekkofs] Guided data distributor: ${USE_GUIDED}") set(USE_GUIDED_PATH "~/guided.txt" CACHE STRING "File Path for guided distributor") set_property(CACHE USE_GUIDED_PATH PROPERTY STRINGS) message(STATUS "[gekkofs] Guided data distributor input file path: ${USE_GUIDED_PATH}") option(TRACE_GUIDED "Output at INFO level information for guided distributor generation: " OFF) message(STATUS "[gekkofs] Generate log line at INFO level for guided distributor: ${TRACE_GUIDED}") configure_file(include/global/cmake_configure.hpp.in include/global/cmake_configure.hpp) # Imported target Loading README.md +34 −0 Original line number Diff line number Diff line Loading @@ -239,6 +239,40 @@ can be provided to set the path to the log file, and the log module can be selected with the `GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace}` environment variable. ### Data distributors The data distribution can be selected at compilation time, we have 2 distributors available: ## Simple Hash (Default) Chunks are distributed randomly to the different GekkoFS servers. ## Guided Distributor Guided distributor distributes chunks using a shared file with the next format: `<path> <chunk_number> <host>` Chunks not specified, are distributed using the Simple Hash distributor. To generate such file we need to follow a first execution, using the next compilation options: * `TRACE_GUIDED` ON * `USE_GUIDED` OFF This will enable a `INFO` level log at the clients offering several lines that can be used to generate the input file. In this stage, each node should generate a separated file this can be done in SLURM using the next line : `srun -N 10 -n 320 --export="ALL" /bin/bash -c "export LIBGKFS_LOG_OUTPUT=${HOME}/test/GLOBAL.txt;LD_PRELOAD=${GKFS_PRLD} <app>"` Then, use the `utils/generate.py` to create the output file. * `python utils/generate.py ~/test/GLOBAL.txt >> guided.txt` This should work if the nodes are sorted in alphabetical order, which is the usual scenario. ``` Finally, enable the distributor using the next compilation flags: * `TRACE_GUIDED` OFF * `USE_GUIDED` ON * `USE_GUIDED_PATH` `<path to guided.txt>` ### Acknowledgment Loading include/client/preload_context.hpp +7 −1 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include <memory> #include <vector> #include <string> #include <config.hpp> #include <bitset> Loading Loading @@ -92,7 +93,9 @@ private: mutable std::mutex internal_fds_mutex_; bool internal_fds_must_relocate_; std::bitset<MAX_USER_FDS> protected_fds_; #ifdef TRACE_GUIDED std::string hostname = ""; #endif public: static PreloadContext* getInstance() { static PreloadContext instance; Loading Loading @@ -167,6 +170,9 @@ public: void protect_user_fds(); void unprotect_user_fds(); #ifdef TRACE_GUIDED std::string get_hostname(); #endif }; } // namespace preload Loading include/global/cmake_configure.hpp.in +3 −0 Original line number Diff line number Diff line Loading @@ -16,5 +16,8 @@ #cmakedefine01 CREATE_CHECK_PARENTS #cmakedefine01 LOG_SYSCALLS #cmakedefine USE_GUIDED #cmakedefine TRACE_GUIDED #define GUIDED_PATH "@USE_GUIDED_PATH@" #endif //FS_CMAKE_CONFIGURE_H include/global/rpc/distributor.hpp +23 −0 Original line number Diff line number Diff line Loading @@ -14,9 +14,12 @@ #ifndef GEKKOFS_RPC_DISTRIBUTOR_HPP #define GEKKOFS_RPC_DISTRIBUTOR_HPP #include "../include/config.hpp" #include <vector> #include <string> #include <numeric> #include <unordered_map> #include <fstream> namespace gkfs { namespace rpc { Loading Loading @@ -87,6 +90,26 @@ public: std::vector<host_t> locate_directory_metadata(const std::string& path) const override; }; class GuidedDistributor : public Distributor { private: host_t localhost_; unsigned int hosts_size_; std::vector<host_t> all_hosts_; std::hash<std::string> str_hash; std::unordered_map< std::string, host_t > mapping; public: GuidedDistributor(host_t localhost, unsigned int hosts_size); host_t localhost() const override; host_t locate_data(const std::string& path, const chunkid_t& chnk_id) const override; host_t locate_file_metadata(const std::string& path) const override; std::vector<host_t> locate_directory_metadata(const std::string& path) const override; }; } // namespace rpc } // namespace gkfs Loading Loading
CMakeLists.txt +10 −0 Original line number Diff line number Diff line Loading @@ -141,6 +141,16 @@ add_definitions(-DLIBGKFS_LOG_MESSAGE_SIZE=${CLIENT_LOG_MESSAGE_SIZE}) message(STATUS "[gekkofs] Maximum log message size in the client library: ${CLIENT_LOG_MESSAGE_SIZE}") mark_as_advanced(CLIENT_LOG_MESSAGE_SIZE) option(USE_GUIDED "Use guided data distributor " OFF) message(STATUS "[gekkofs] Guided data distributor: ${USE_GUIDED}") set(USE_GUIDED_PATH "~/guided.txt" CACHE STRING "File Path for guided distributor") set_property(CACHE USE_GUIDED_PATH PROPERTY STRINGS) message(STATUS "[gekkofs] Guided data distributor input file path: ${USE_GUIDED_PATH}") option(TRACE_GUIDED "Output at INFO level information for guided distributor generation: " OFF) message(STATUS "[gekkofs] Generate log line at INFO level for guided distributor: ${TRACE_GUIDED}") configure_file(include/global/cmake_configure.hpp.in include/global/cmake_configure.hpp) # Imported target Loading
README.md +34 −0 Original line number Diff line number Diff line Loading @@ -239,6 +239,40 @@ can be provided to set the path to the log file, and the log module can be selected with the `GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace}` environment variable. ### Data distributors The data distribution can be selected at compilation time, we have 2 distributors available: ## Simple Hash (Default) Chunks are distributed randomly to the different GekkoFS servers. ## Guided Distributor Guided distributor distributes chunks using a shared file with the next format: `<path> <chunk_number> <host>` Chunks not specified, are distributed using the Simple Hash distributor. To generate such file we need to follow a first execution, using the next compilation options: * `TRACE_GUIDED` ON * `USE_GUIDED` OFF This will enable a `INFO` level log at the clients offering several lines that can be used to generate the input file. In this stage, each node should generate a separated file this can be done in SLURM using the next line : `srun -N 10 -n 320 --export="ALL" /bin/bash -c "export LIBGKFS_LOG_OUTPUT=${HOME}/test/GLOBAL.txt;LD_PRELOAD=${GKFS_PRLD} <app>"` Then, use the `utils/generate.py` to create the output file. * `python utils/generate.py ~/test/GLOBAL.txt >> guided.txt` This should work if the nodes are sorted in alphabetical order, which is the usual scenario. ``` Finally, enable the distributor using the next compilation flags: * `TRACE_GUIDED` OFF * `USE_GUIDED` ON * `USE_GUIDED_PATH` `<path to guided.txt>` ### Acknowledgment Loading
include/client/preload_context.hpp +7 −1 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include <memory> #include <vector> #include <string> #include <config.hpp> #include <bitset> Loading Loading @@ -92,7 +93,9 @@ private: mutable std::mutex internal_fds_mutex_; bool internal_fds_must_relocate_; std::bitset<MAX_USER_FDS> protected_fds_; #ifdef TRACE_GUIDED std::string hostname = ""; #endif public: static PreloadContext* getInstance() { static PreloadContext instance; Loading Loading @@ -167,6 +170,9 @@ public: void protect_user_fds(); void unprotect_user_fds(); #ifdef TRACE_GUIDED std::string get_hostname(); #endif }; } // namespace preload Loading
include/global/cmake_configure.hpp.in +3 −0 Original line number Diff line number Diff line Loading @@ -16,5 +16,8 @@ #cmakedefine01 CREATE_CHECK_PARENTS #cmakedefine01 LOG_SYSCALLS #cmakedefine USE_GUIDED #cmakedefine TRACE_GUIDED #define GUIDED_PATH "@USE_GUIDED_PATH@" #endif //FS_CMAKE_CONFIGURE_H
include/global/rpc/distributor.hpp +23 −0 Original line number Diff line number Diff line Loading @@ -14,9 +14,12 @@ #ifndef GEKKOFS_RPC_DISTRIBUTOR_HPP #define GEKKOFS_RPC_DISTRIBUTOR_HPP #include "../include/config.hpp" #include <vector> #include <string> #include <numeric> #include <unordered_map> #include <fstream> namespace gkfs { namespace rpc { Loading Loading @@ -87,6 +90,26 @@ public: std::vector<host_t> locate_directory_metadata(const std::string& path) const override; }; class GuidedDistributor : public Distributor { private: host_t localhost_; unsigned int hosts_size_; std::vector<host_t> all_hosts_; std::hash<std::string> str_hash; std::unordered_map< std::string, host_t > mapping; public: GuidedDistributor(host_t localhost, unsigned int hosts_size); host_t localhost() const override; host_t locate_data(const std::string& path, const chunkid_t& chnk_id) const override; host_t locate_file_metadata(const std::string& path) const override; std::vector<host_t> locate_directory_metadata(const std::string& path) const override; }; } // namespace rpc } // namespace gkfs Loading