Loading CMakeLists.txt +10 −0 Original line number Diff line number Diff line Loading @@ -141,6 +141,16 @@ add_definitions(-DLIBGKFS_LOG_MESSAGE_SIZE=${CLIENT_LOG_MESSAGE_SIZE}) message(STATUS "[gekkofs] Maximum log message size in the client library: ${CLIENT_LOG_MESSAGE_SIZE}") mark_as_advanced(CLIENT_LOG_MESSAGE_SIZE) option(USE_GUIDED "Use guided data distributor " OFF) message(STATUS "[gekkofs] Guided data distributor: ${USE_GUIDED}") set(USE_GUIDED_PATH "~/guided.txt" CACHE STRING "File Path for guided distributor") set_property(CACHE USE_GUIDED_PATH PROPERTY STRINGS) message(STATUS "[gekkofs] Guided data distributor input file path: ${USE_GUIDED_PATH}") option(TRACE_GUIDED "Output at INFO level information for guided distributor generation: " OFF) message(STATUS "[gekkofs] Generate log line at INFO level for guided distributor: ${TRACE_GUIDED}") configure_file(include/global/cmake_configure.hpp.in include/global/cmake_configure.hpp) # Imported target Loading README.md +34 −0 Original line number Diff line number Diff line Loading @@ -243,6 +243,40 @@ can be provided to set the path to the log file, and the log module can be selected with the `GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace}` environment variable. ### Data distributors The data distribution can be selected at compilation time, we have 2 distributors available: ## Simple Hash (Default) Chunks are distributed randomly to the different GekkoFS servers. ## Guided Distributor Guided distributor distributes chunks using a shared file with the next format: `<path> <chunk_number> <host>` Chunks not specified, are distributed using the Simple Hash distributor. To generate such file we need to follow a first execution, using the next compilation options: * `TRACE_GUIDED` ON * `USE_GUIDED` OFF This will enable a `INFO` level log at the clients offering several lines that can be used to generate the input file. In this stage, each node should generate a separated file this can be done in SLURM using the next line : `srun -N 10 -n 320 --export="ALL" /bin/bash -c "export LIBGKFS_LOG_OUTPUT=${HOME}/test/GLOBAL.txt;LD_PRELOAD=${GKFS_PRLD} <app>"` Then, use the `utils/generate.py` to create the output file. * `python utils/generate.py ~/test/GLOBAL.txt >> guided.txt` This should work if the nodes are sorted in alphabetical order, which is the usual scenario. ``` Finally, enable the distributor using the next compilation flags: * `TRACE_GUIDED` OFF * `USE_GUIDED` ON * `USE_GUIDED_PATH` `<path to guided.txt>` ### Acknowledgment Loading include/client/preload_context.hpp +7 −1 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include <memory> #include <vector> #include <string> #include <config.hpp> #include <bitset> Loading Loading @@ -92,7 +93,9 @@ private: mutable std::mutex internal_fds_mutex_; bool internal_fds_must_relocate_; std::bitset<MAX_USER_FDS> protected_fds_; #ifdef TRACE_GUIDED std::string hostname = ""; #endif public: static PreloadContext* getInstance() { static PreloadContext instance; Loading Loading @@ -167,6 +170,9 @@ public: void protect_user_fds(); void unprotect_user_fds(); #ifdef TRACE_GUIDED std::string get_hostname(); #endif }; } // namespace preload Loading include/daemon/classes/rpc_data.hpp +13 −1 Original line number Diff line number Diff line Loading @@ -18,6 +18,13 @@ #include <daemon/daemon.hpp> namespace gkfs { /* Forward declarations */ namespace rpc { class Distributor; } namespace daemon { class RPCData { Loading @@ -32,6 +39,8 @@ private: ABT_pool io_pool_; std::vector<ABT_xstream> io_streams_; std::string self_addr_str_; // Distributor std::shared_ptr<gkfs::rpc::Distributor> distributor_; public: Loading Loading @@ -62,6 +71,9 @@ public: void self_addr_str(const std::string& addr_str); const std::shared_ptr<gkfs::rpc::Distributor>& distributor() const; void distributor(const std::shared_ptr<gkfs::rpc::Distributor>& distributor); }; } // namespace daemon Loading include/daemon/daemon.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -29,6 +29,7 @@ extern "C" { #include <daemon/classes/fs_data.hpp> #include <daemon/classes/rpc_data.hpp> #include <global/rpc/distributor.hpp> #define GKFS_DATA (static_cast<gkfs::daemon::FsData*>(gkfs::daemon::FsData::getInstance())) #define RPC_DATA (static_cast<gkfs::daemon::RPCData*>(gkfs::daemon::RPCData::getInstance())) Loading Loading
CMakeLists.txt +10 −0 Original line number Diff line number Diff line Loading @@ -141,6 +141,16 @@ add_definitions(-DLIBGKFS_LOG_MESSAGE_SIZE=${CLIENT_LOG_MESSAGE_SIZE}) message(STATUS "[gekkofs] Maximum log message size in the client library: ${CLIENT_LOG_MESSAGE_SIZE}") mark_as_advanced(CLIENT_LOG_MESSAGE_SIZE) option(USE_GUIDED "Use guided data distributor " OFF) message(STATUS "[gekkofs] Guided data distributor: ${USE_GUIDED}") set(USE_GUIDED_PATH "~/guided.txt" CACHE STRING "File Path for guided distributor") set_property(CACHE USE_GUIDED_PATH PROPERTY STRINGS) message(STATUS "[gekkofs] Guided data distributor input file path: ${USE_GUIDED_PATH}") option(TRACE_GUIDED "Output at INFO level information for guided distributor generation: " OFF) message(STATUS "[gekkofs] Generate log line at INFO level for guided distributor: ${TRACE_GUIDED}") configure_file(include/global/cmake_configure.hpp.in include/global/cmake_configure.hpp) # Imported target Loading
README.md +34 −0 Original line number Diff line number Diff line Loading @@ -243,6 +243,40 @@ can be provided to set the path to the log file, and the log module can be selected with the `GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace}` environment variable. ### Data distributors The data distribution can be selected at compilation time, we have 2 distributors available: ## Simple Hash (Default) Chunks are distributed randomly to the different GekkoFS servers. ## Guided Distributor Guided distributor distributes chunks using a shared file with the next format: `<path> <chunk_number> <host>` Chunks not specified, are distributed using the Simple Hash distributor. To generate such file we need to follow a first execution, using the next compilation options: * `TRACE_GUIDED` ON * `USE_GUIDED` OFF This will enable a `INFO` level log at the clients offering several lines that can be used to generate the input file. In this stage, each node should generate a separated file this can be done in SLURM using the next line : `srun -N 10 -n 320 --export="ALL" /bin/bash -c "export LIBGKFS_LOG_OUTPUT=${HOME}/test/GLOBAL.txt;LD_PRELOAD=${GKFS_PRLD} <app>"` Then, use the `utils/generate.py` to create the output file. * `python utils/generate.py ~/test/GLOBAL.txt >> guided.txt` This should work if the nodes are sorted in alphabetical order, which is the usual scenario. ``` Finally, enable the distributor using the next compilation flags: * `TRACE_GUIDED` OFF * `USE_GUIDED` ON * `USE_GUIDED_PATH` `<path to guided.txt>` ### Acknowledgment Loading
include/client/preload_context.hpp +7 −1 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include <memory> #include <vector> #include <string> #include <config.hpp> #include <bitset> Loading Loading @@ -92,7 +93,9 @@ private: mutable std::mutex internal_fds_mutex_; bool internal_fds_must_relocate_; std::bitset<MAX_USER_FDS> protected_fds_; #ifdef TRACE_GUIDED std::string hostname = ""; #endif public: static PreloadContext* getInstance() { static PreloadContext instance; Loading Loading @@ -167,6 +170,9 @@ public: void protect_user_fds(); void unprotect_user_fds(); #ifdef TRACE_GUIDED std::string get_hostname(); #endif }; } // namespace preload Loading
include/daemon/classes/rpc_data.hpp +13 −1 Original line number Diff line number Diff line Loading @@ -18,6 +18,13 @@ #include <daemon/daemon.hpp> namespace gkfs { /* Forward declarations */ namespace rpc { class Distributor; } namespace daemon { class RPCData { Loading @@ -32,6 +39,8 @@ private: ABT_pool io_pool_; std::vector<ABT_xstream> io_streams_; std::string self_addr_str_; // Distributor std::shared_ptr<gkfs::rpc::Distributor> distributor_; public: Loading Loading @@ -62,6 +71,9 @@ public: void self_addr_str(const std::string& addr_str); const std::shared_ptr<gkfs::rpc::Distributor>& distributor() const; void distributor(const std::shared_ptr<gkfs::rpc::Distributor>& distributor); }; } // namespace daemon Loading
include/daemon/daemon.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -29,6 +29,7 @@ extern "C" { #include <daemon/classes/fs_data.hpp> #include <daemon/classes/rpc_data.hpp> #include <global/rpc/distributor.hpp> #define GKFS_DATA (static_cast<gkfs::daemon::FsData*>(gkfs::daemon::FsData::getInstance())) #define RPC_DATA (static_cast<gkfs::daemon::RPCData*>(gkfs::daemon::RPCData::getInstance())) Loading