Commit 29cc8203 authored by Marc Vef's avatar Marc Vef
Browse files

LRU address cache replaced with map; added mutex for address lookup

The LRU map caused multiple issues with memory mappings and caused
severe errors and hanging situations. It has been replaced with a
std::map.

An Argobots mutex was added for rpc address lookups as many threads
might insert and lookup this map in parallel. This caused bulk_transfer
seqfauls as well as other file system crashes.

Cleanup of file system logging.
parent f101aaa9
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -3,7 +3,6 @@
#define LFS_RPC_DATA_HPP

#include <daemon/adafs_daemon.hpp>
#include <extern/lrucache/LRUCache11.hpp>

class RPCData {

+0 −228
Original line number Diff line number Diff line
/*
 * LRUCache11 - a templated C++11 based LRU cache class that allows
 * specification of
 * key, value and optionally the map container type (defaults to
 * std::unordered_map)
 * By using the std::map and a linked list of keys it allows O(1) insert, delete
 * and
 * refresh operations.
 *
 * This is a header-only library and all you need is the LRUCache11.hpp file
 *
 * Github: https://github.com/mohaps/lrucache11
 *
 * This is a follow-up to the LRUCache project -
 * https://github.com/mohaps/lrucache
 *
 * Copyright (c) 2012-22 SAURAV MOHAPATRA <mohaps@gmail.com>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#pragma once

#include <algorithm>
#include <cstdint>
#include <list>
#include <mutex>
#include <stdexcept>
#include <thread>
#include <unordered_map>

namespace lru11 {
/**
 * base class to prevent copy
 * use as ClassName : private NoCopy {}
 * to prevent copy constructor of ClassName and assignment by copy
 */
    class NoCopy {
    public:
        virtual ~NoCopy() = default;

    protected:
        NoCopy() = default;

    private:
        NoCopy(const NoCopy&) = delete;

        const NoCopy& operator=(const NoCopy&) = delete;
    };

/*
 * a noop lockable concept that can be used in place of std::mutex
 */
    class NullLock {
    public:
        void lock() {}

        void unlock() {}

        bool try_lock() { return true; }
    };

/**
 * error raised when a key not in cache is passed to get()
 */
    class KeyNotFound : public std::invalid_argument {
    public:
        KeyNotFound() : std::invalid_argument("key_not_found") {}
    };

    template<typename K, typename V>
    struct KeyValuePair {
    public:
        K key;
        V value;

        KeyValuePair(const K& k, const V& v) : key(k), value(v) {}
    };

/**
 *	The LRU Cache class templated by
 *		Key - key type
 *		Value - value type
 *		MapType - an associative container like std::unordered_map
 *		LockType - a lock type derived from the Lock class (default:
 *NullLock = no synchronization)
 *
 *	The default NullLock based template is not thread-safe, however passing
 *Lock=std::mutex will make it
 *	thread-safe
 */
    template<class Key, class Value, class Lock = NullLock,
            class Map = std::unordered_map<
                    Key, typename std::list<KeyValuePair<Key, Value>>::iterator>>
    class Cache : private NoCopy {
    public:
        typedef KeyValuePair<Key, Value> node_type;
        typedef std::list<KeyValuePair<Key, Value>> list_type;
        typedef Map map_type;
        typedef Lock lock_type;
        using Guard = std::lock_guard<lock_type>;

        /**
         * the max size is the hard limit of keys and (maxSize + elasticity) is the
         * soft limit
         * the cache is allowed to grow till maxSize + elasticity and is pruned back
         * to maxSize keys
         * set maxSize = 0 for an unbounded cache (but in that case, you're better off
         * using a std::unordered_map
         * directly anyway! :)
         */
        explicit Cache(size_t maxSize = 64, size_t elasticity = 10)
                : maxSize_(maxSize), elasticity_(elasticity) {}

        virtual ~Cache() = default;

        size_t size() const {
            Guard g(lock_);
            return cache_.size();
        }

        bool empty() const {
            Guard g(lock_);
            return cache_.empty();
        }

        void clear() {
            Guard g(lock_);
            cache_.clear();
            keys_.clear();
        }

        void insert(const Key& k, const Value& v) {
            Guard g(lock_);
            const auto iter = cache_.find(k);
            if (iter != cache_.end()) {
                iter->second->value = v;
                keys_.splice(keys_.begin(), keys_, iter->second);
                return;
            }

            keys_.emplace_front(k, v);
            cache_[k] = keys_.begin();
            prune();
        }

        bool tryGet(const Key& kIn, Value& vOut) {
            Guard g(lock_);
            const auto iter = cache_.find(kIn);
            if (iter == cache_.end()) {
                return false;
            }
            keys_.splice(keys_.begin(), keys_, iter->second);
            vOut = iter->second->value;
            return true;
        }

        const Value& get(const Key& k) {
            Guard g(lock_);
            const auto iter = cache_.find(k);
            if (iter == cache_.end()) {
                throw KeyNotFound();
            }
            keys_.splice(keys_.begin(), keys_, iter->second);
            return iter->second->value;
        }

        bool remove(const Key& k) {
            Guard g(lock_);
            auto iter = cache_.find(k);
            if (iter == cache_.end()) {
                return false;
            }
            keys_.erase(iter->second);
            cache_.erase(iter);
            return true;
        }

        bool contains(const Key& k) {
            Guard g(lock_);
            return cache_.find(k) != cache_.end();
        }

        size_t getMaxSize() const { return maxSize_; }

        size_t getElasticity() const { return elasticity_; }

        size_t getMaxAllowedSize() const { return maxSize_ + elasticity_; }

        template<typename F>
        void cwalk(F& f) const {
            Guard g(lock_);
            std::for_each(keys_.begin(), keys_.end(), f);
        }

    protected:
        size_t prune() {
            size_t maxAllowed = maxSize_ + elasticity_;
            if (maxSize_ == 0 || cache_.size() < maxAllowed) {
                return 0;
            }
            size_t count = 0;
            while (cache_.size() > maxSize_) {
                cache_.erase(keys_.back().key);
                keys_.pop_back();
                ++count;
            }
            return count;
        }

    private:
        mutable Lock lock_;
        Map cache_;
        list_type keys_;
        size_t maxSize_;
        size_t elasticity_;
    };

}  // namespace LRUCache11
+2 −3
Original line number Diff line number Diff line
@@ -6,7 +6,6 @@
#include <preload/open_file_map.hpp>
// third party libs
#include <extern/spdlog/spdlog.h>
#include <extern/lrucache/LRUCache11.hpp>
#include <string>

// TODO singleton this stuff away
@@ -99,8 +98,8 @@ extern std::shared_ptr<struct FsConfig> fs_config;
// global logger instance
extern std::shared_ptr<spdlog::logger> ld_logger;
// rpc address cache
typedef lru11::Cache<uint64_t, hg_addr_t> KVCache;
extern KVCache rpc_address_cache;
extern std::map<uint64_t, hg_addr_t> rpc_address_cache;
extern ABT_mutex rpc_address_cache_mutex;
// file descriptor index validation flag
extern std::atomic<bool> fd_validation_needed;
// thread pool
+4 −9
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ static hg_return_t rpc_srv_write_data(hg_handle_t handle) {
    auto hgi = margo_get_info(handle);
    auto mid = margo_hg_info_get_instance(hgi);
    auto bulk_size = margo_bulk_get_size(in.bulk_handle);
    ADAFS_DATA->spdlogger()->info("{}() Got write RPC (local {}) with path {} size {} offset {}", __func__,
    ADAFS_DATA->spdlogger()->debug("{}() Got write RPC (local {}) with path {} size {} offset {}", __func__,
                                   (margo_get_info(handle)->target_id == ADAFS_DATA->host_id()), in.path, bulk_size,
                                   in.offset);
    /*
@@ -110,9 +110,6 @@ static hg_return_t rpc_srv_write_data(hg_handle_t handle) {
        chnk_ids_host[chnk_id_curr] = chnk_id_file; // save this id to host chunk list
        // offset case. Only relevant in the first iteration of the loop and if the chunk hashes to this host
        if (chnk_id_file == in.chunk_start && in.offset > 0) {
            // DEL BEGIN
            ADAFS_DATA->spdlogger()->info("{}() XXX offset case!", __func__);
            // DEL END
            // if only 1 destination and 1 chunk (small write) the transfer_size == bulk_size
            auto offset_transfer_size = (in.offset + bulk_size <= CHUNKSIZE) ? bulk_size : static_cast<size_t>(
                    CHUNKSIZE - in.offset);
@@ -139,12 +136,10 @@ static hg_return_t rpc_srv_write_data(hg_handle_t handle) {
            // last chunk might have different transfer_size
            if (chnk_id_curr == in.chunk_n - 1)
                transfer_size = chnk_size_left_host;
            // DEL BEGIN
            ADAFS_DATA->spdlogger()->info(
            ADAFS_DATA->spdlogger()->trace(
                    "{}() BULK_TRANSFER hostid {} file {} chnkid {} total_Csize {} Csize_left {} origin offset {} local offset {} transfersize {}",
                    __func__, ADAFS_DATA->host_id(), in.path, chnk_id_file, in.total_chunk_size, chnk_size_left_host,
                    origin_offset, local_offset, transfer_size);
            // DEL END
            // RDMA the data to here
            ret = margo_bulk_transfer(mid, HG_BULK_PULL, hgi->addr, in.bulk_handle, origin_offset,
                                      bulk_handle, local_offset, transfer_size);
+0 −10
Original line number Diff line number Diff line
@@ -210,16 +210,6 @@ ssize_t adafs_pwrite_ws(int fd, const void* buf, size_t count, off64_t offset) {
        if (i == dest_n - 1 && ((offset + count) % CHUNKSIZE) != 0) // receiver of last chunk must subtract
            total_chunk_size -= (CHUNKSIZE - ((offset + count) % CHUNKSIZE));
        auto args = make_unique<write_args>();
        // DEL BEGIN
        string ids = ""s;
        for (auto&& id : dest_ids[dest_idx[i]]) {
            ids += fmt::FormatInt(id).str() + "  "s;
        }
        ld_logger->info(
                "{}() destination {} chnk_offset {} size {} total_chnksize {} div {} mod {} chnkids\n{}",
                __func__, dest_idx[i], offset % CHUNKSIZE, count, total_chunk_size, total_chunk_size / CHUNKSIZE,
                total_chunk_size % CHUNKSIZE, ids);
        // DEL END
        args->path = path; // path
        args->total_chunk_size = total_chunk_size; // total size to write
        args->in_size = count;
Loading