diff --git a/README.md b/README.md index 28779a71cac64073efcf2f5fee9bb3010294bb20..00dc8c4e44eb0f7fc3b6ab7db34cbdee2f6f112b 100644 --- a/README.md +++ b/README.md @@ -192,10 +192,6 @@ Chunks are distributed randomly to the different GekkoFS servers. ### Guided Distributor -To use the Guided Distributor, Boost (specifically the Boost *Interval Container Library* (ICL) must be available). - -#### General - The guided distributor allows defining a specific distribution of data on a per directory or file basis. The distribution configurations are defined within a shared file (called `guided_config.txt` henceforth) with the following format: ` ` @@ -232,4 +228,4 @@ This software was partially supported by the ADA-FS project under the SPPEXA pro This software is partially supported by the FIDIUM project funded by the DFG. -This software is partially supported by the ADMIRE project (https://www.admire-eurohpc.eu/) funded by the European Union’s Horizon 2020 JTI-EuroHPC Research and Innovation Programme (Grant 956748). \ No newline at end of file +This software is partially supported by the ADMIRE project (https://www.admire-eurohpc.eu/) funded by the European Union’s Horizon 2020 JTI-EuroHPC Research and Innovation Programme (Grant 956748). diff --git a/include/common/rpc/distributor.hpp b/include/common/rpc/distributor.hpp index 329f1727d3a111ff46dbbc5bc3804d0489cd4b50..53910a8cc7a672f07759af4b3216508021e86b72 100644 --- a/include/common/rpc/distributor.hpp +++ b/include/common/rpc/distributor.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). @@ -35,10 +35,7 @@ #include #include #include - -#ifdef GKFS_USE_GUIDED_DISTRIBUTION -#include -#endif +#include namespace gkfs::rpc { @@ -140,15 +137,28 @@ public: std::vector locate_directory_metadata(const std::string& path) const override; }; -#ifdef GKFS_USE_GUIDED_DISTRIBUTION + +/* + * Class IntervalSet + * FROM + *https://stackoverflow.com/questions/55646605/is-there-a-collection-for-storing-discrete-intervals + **/ +class IntervalSet { + std::map _intervals; + +public: + void Add(chunkid_t, chunkid_t); + bool + IsInsideInterval(unsigned int) const; +}; + class GuidedDistributor : public Distributor { private: host_t localhost_; unsigned int hosts_size_{0}; std::vector all_hosts_; std::hash str_hash; - std::unordered_map> + std::unordered_map> map_interval; std::vector prefix_list; // Should not be very long bool @@ -176,7 +186,6 @@ public: std::vector locate_directory_metadata(const std::string& path) const override; }; -#endif } // namespace gkfs::rpc diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index fcea2f774c047bf357ee007b347100a41c91707f..5dffa1dc2d6d4b2b3c457dccbbe012ca76a9e3c5 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -38,10 +38,6 @@ target_sources(distributor PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rpc/distributor.cpp ) -if(GKFS_USE_GUIDED_DISTRIBUTION) - find_package(Boost 1.53 REQUIRED) - target_link_libraries(distributor PRIVATE Boost::boost) -endif() if(GKFS_ENABLE_CODE_COVERAGE) target_code_coverage(distributor AUTO) diff --git a/src/common/rpc/distributor.cpp b/src/common/rpc/distributor.cpp index 5c95c53db1e44bf898c9fc904120520524c9a34a..9e5d22c3e31b7822893eb0b9409e670ab6426f75 100644 --- a/src/common/rpc/distributor.cpp +++ b/src/common/rpc/distributor.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2021, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2021, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). @@ -125,7 +125,31 @@ std::vector ForwarderDistributor::locate_directory_metadata(const std::string& path) const { return all_hosts_; } -#ifdef GKFS_USE_GUIDED_DISTRIBUTION + +void +IntervalSet::Add(chunkid_t smaller, chunkid_t bigger) { + const auto next = _intervals.upper_bound(smaller); + if(next != _intervals.cbegin()) { + const auto prev = std::prev(next); + if(next != _intervals.cend() && next->first <= bigger + 1) { + bigger = next->second; + _intervals.erase(next); + } + if(prev->second + 1 >= smaller) { + smaller = prev->first; + _intervals.erase(prev); + } + } + _intervals[smaller] = bigger; +} + +bool +IntervalSet::IsInsideInterval(unsigned int v) const { + const auto suspectNext = _intervals.upper_bound(v); + const auto suspect = std::prev(suspectNext); + return suspect->first <= v && v <= suspect->second; +} + bool GuidedDistributor::init_guided() { unsigned int destination_host; @@ -148,16 +172,15 @@ GuidedDistributor::init_guided() { } auto I = map_interval.find(path); - if(I == map_interval.end()) - map_interval[path] += make_pair( - boost::icl::discrete_interval::right_open( - chunk_id, chunk_id + 1), - destination_host + 1); - else if(I->second.find(chunk_id) == I->second.end()) - I->second.insert(make_pair( - boost::icl::discrete_interval::right_open( - chunk_id, chunk_id + 1), - destination_host + 1)); + if(I == map_interval.end()) { + auto tmp = IntervalSet(); + tmp.Add(chunk_id, chunk_id + 1); + map_interval[path] = make_pair(tmp, destination_host + 1); + } else if(I->second.first.IsInsideInterval(chunk_id)) { + auto is = I->second.first; + is.Add(chunk_id, chunk_id + 1); + I->second = (make_pair(is, destination_host + 1)); + } } mapfile.close(); return true; @@ -200,9 +223,9 @@ GuidedDistributor::locate_data(const string& path, const chunkid_t& chnk_id) const { auto it = map_interval.find(path); if(it != map_interval.end()) { - auto it_f = it->second.find(chnk_id); - if(it_f != it->second.end()) { - return (it_f->second - + auto it_f = it->second.first.IsInsideInterval(chnk_id); + if(it_f) { + return (it->second.second - 1); // Decrement destination host from the interval_map } } @@ -227,6 +250,6 @@ GuidedDistributor::locate_file_metadata(const string& path) const { GuidedDistributor::locate_directory_metadata(const string& path) const { return all_hosts_; } -#endif + } // namespace rpc } // namespace gkfs diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 13a9051ece5df97fe3c044feaf198eb363c058ab..fd0f11b0b8b605c1d83ec012782260c020848119 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -66,10 +66,9 @@ target_sources(tests ${CMAKE_CURRENT_LIST_DIR}/test_example_01.cpp ${CMAKE_CURRENT_LIST_DIR}/test_utils_arithmetic.cpp ${CMAKE_CURRENT_LIST_DIR}/test_helpers.cpp - ) -if(GKFS_TESTS_GUIDED_DISTRIBUTION) - target_sources(tests PRIVATE ${CMAKE_CURRENT_LIST_DIR}/test_guided_distributor.cpp) -endif() + ${CMAKE_CURRENT_LIST_DIR}/test_guided_distributor.cpp) + + target_link_libraries(tests PRIVATE catch2_main