Commit 525ca153 authored by Marc Vef's avatar Marc Vef
Browse files

Adding directory entry cache avoiding stat RPCs. Flushed for dir on dir close.

This uses the extended dir RPC call.
parent 476e2933
Loading
Loading
Loading
Loading
+30 −5
Original line number Diff line number Diff line
@@ -30,17 +30,37 @@
#ifndef GKFS_CLIENT_CACHE
#define GKFS_CLIENT_CACHE

#include <client/open_file_map.hpp>

#include <ctime>
#include <functional>
#include <string>
#include <unordered_map>
#include <mutex>
#include <optional>
#include <cstdint>

namespace gkfs::cache {

struct cache_entry {
    gkfs::filemap::FileType file_type;
    uint64_t size;
    time_t ctime;
};

class Cache {
private:
    std::unordered_map<std::string, std::string> entries_;
    std::unordered_map<uint32_t, std::unordered_map<std::string, cache_entry>>
            entries_;
    std::unordered_map<std::string, uint32_t> entry_dir_id_;
    std::mutex mtx_;
    std::hash<std::string> str_hash;

    uint32_t
    gen_dir_id(const std::string& dir_path);

    uint32_t
    get_dir_id(const std::string& dir_path);

public:
    Cache() = default;
@@ -48,18 +68,23 @@ public:
    virtual ~Cache() = default;

    void
    insert(const std::string& key, const std::string& value);
    insert(const std::string& parent_dir, const std::string name,
           const cache_entry value);

    std::optional<cache_entry>
    get(const std::string& parent_dir, const std::string& name);

    std::optional<std::string>
    get(const std::string& key);
    void
    clear_dir(const std::string& dir_path);

    void
    remove(const std::string& key);
    dump_cache_to_log(const std::string& dir_path);

    void
    clear();
};


} // namespace gkfs::cache

#endif // GKFS_CLIENT_CACHE
+67 −10
Original line number Diff line number Diff line
@@ -28,38 +28,95 @@
*/

#include <client/cache.hpp>
#include <client/preload.hpp>
#include <client/preload_util.hpp>
#include <client/logging.hpp>

#include <cstdint>
#include <mutex>
#include <optional>
#include <string>
#include <unordered_map>

namespace gkfs::cache {

uint32_t
Cache::gen_dir_id(const std::string& dir_path) {
    return str_hash(dir_path);
}

uint32_t
Cache::get_dir_id(const std::string& dir_path) {
    // check if id already exists in map and return
    if(entry_dir_id_.find(dir_path) != entry_dir_id_.end()) {
        return entry_dir_id_[dir_path];
    }
    // otherwise generate one
    auto dir_id = gen_dir_id(dir_path);
    entry_dir_id_.emplace(dir_path, dir_id);
    return dir_id;
}


void
Cache::insert(const std::string& key, const std::string& value) {
Cache::insert(const std::string& parent_dir, const std::string name,
              const cache_entry value) {
    std::lock_guard<std::mutex> const lock(mtx_);
    entries_[key] = value;
    auto dir_id = get_dir_id(parent_dir);
    entries_[dir_id].emplace(name, value);
}

std::optional<std::string>
Cache::get(const std::string& key) {
std::optional<cache_entry>
Cache::get(const std::string& parent_dir, const std::string& name) {
    std::lock_guard<std::mutex> const lock(mtx_);
    // return key if found
    if(entries_.find(key) != entries_.end()) {
        return entries_[key];
    }
    auto dir_id = get_dir_id(parent_dir);
    if(entries_[dir_id].find(name) != entries_[dir_id].end()) {
        return entries_[dir_id][name];
    } else {
        return {};
    }
}

void
Cache::remove(const std::string& key) {
Cache::clear_dir(const std::string& dir_path) {
    std::lock_guard<std::mutex> const lock(mtx_);
    entries_.erase(key);

    auto id_it = entry_dir_id_.find(dir_path);
    if(id_it == entry_dir_id_.end()) {
        return;
    }
    auto entry_it = entries_.find(id_it->second);
    if(entry_it != entries_.end()) {
        entries_.erase(entry_it);
    }
    entry_dir_id_.erase(id_it);
}

void
Cache::dump_cache_to_log(const std::string& dir_path) {
    std::lock_guard<std::mutex> const lock(mtx_);
    auto id_it = entry_dir_id_.find(dir_path);
    if(id_it == entry_dir_id_.end()) {
        LOG(INFO, "{}(): Cache contents for dir path '{}' NONE", __func__,
            dir_path);
        return;
    }
    auto dir_id = id_it->second;
    for(auto& [name, entry] : entries_[dir_id]) {
        // log entry
        LOG(INFO,
            "{}(): Cache contents for dir path '{}' -> name '{}' is_dir '{}' size '{}' ctime '{}'",
            __func__, dir_path, name,
            entry.file_type == gkfs::filemap::FileType::directory, entry.size,
            entry.ctime);
    }
}

void
Cache::clear() {
    std::lock_guard<std::mutex> const lock(mtx_);
    entries_.clear();
    entry_dir_id_.clear();
}

} // namespace gkfs::cache
+42 −4
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
#include <client/rpc/forward_data.hpp>
#include <client/rpc/forward_data_proxy.hpp>
#include <client/open_dir.hpp>
#include <client/cache.hpp>

#include <common/path_util.hpp>
#ifdef GKFS_ENABLE_CLIENT_METRICS
@@ -1295,6 +1296,7 @@ gkfs_readv(int fd, const struct iovec* iov, int iovcnt) {
 */
int
gkfs_opendir(const std::string& path) {
    LOG(INFO, "{}(): XXX Path '{}'", __func__, path);
    auto md = gkfs::utils::get_metadata(path);
    if(!md) {
        return -1;
@@ -1305,8 +1307,35 @@ gkfs_opendir(const std::string& path) {
        errno = ENOTDIR;
        return -1;
    }

    auto ret = gkfs::rpc::forward_get_dirents(path);
    pair<int, shared_ptr<gkfs::filemap::OpenDir>> ret{};
    // Use cache: Aka get all entries from all servers for the basic metadata
    // this is used in get_metadata() later to avoid stat RPCs
    if(CTX->use_cache()) {
        ret.second = make_shared<gkfs::filemap::OpenDir>(path);
        // TODO parallelize
        for(uint64_t i = 0; i < CTX->hosts().size(); i++) {
            auto res = gkfs::rpc::forward_get_dirents_single(path, i);
            auto& open_dir = *res.second;
            for(auto& dentry : open_dir) {
                // type returns as a boolean. true if it is a directory
                LOG(DEBUG, "name: {} type: {} size: {} ctime: {}",
                    get<0>(dentry), get<1>(dentry), get<2>(dentry),
                    get<3>(dentry));
                auto ftype = get<1>(dentry) ? gkfs::filemap::FileType::directory
                                            : gkfs::filemap::FileType::regular;
                // filename, is_dir, size, ctime
                ret.second->add(get<0>(dentry), ftype);
                CTX->cache()->insert(path, get<0>(dentry),
                                     gkfs::cache::cache_entry{ftype,
                                                              get<2>(dentry),
                                                              get<3>(dentry)});
            }
            ret.first = res.first;
        }
        CTX->cache()->dump_cache_to_log(path);
    } else {
        ret = gkfs::rpc::forward_get_dirents(path);
    }
    auto err = ret.first;
    if(err) {
        errno = err;
@@ -1369,7 +1398,7 @@ gkfs_rmdir(const std::string& path) {
 */
int
gkfs_getdents(unsigned int fd, struct linux_dirent* dirp, unsigned int count) {

    LOG(INFO, "{}(): XXX fd '{}' count '{}'", __func__, fd, count);
    // Get opendir object (content was downloaded with opendir() call)
    auto open_dir = CTX->file_map()->get_dir(fd);
    if(open_dir == nullptr) {
@@ -1444,7 +1473,7 @@ gkfs_getdents(unsigned int fd, struct linux_dirent* dirp, unsigned int count) {
int
gkfs_getdents64(unsigned int fd, struct linux_dirent64* dirp,
                unsigned int count) {

    LOG(INFO, "{}(): XXX enter fd '{}' count '{}'", __func__, fd, count);
    auto open_dir = CTX->file_map()->get_dir(fd);
    if(open_dir == nullptr) {
        // Cast did not succeeded: open_file is a regular file
@@ -1513,6 +1542,15 @@ gkfs_getdents64(unsigned int fd, struct linux_dirent64* dirp,
int
gkfs_close(unsigned int fd) {
    if(CTX->file_map()->exist(fd)) {
        if(CTX->use_cache()) {
            // clear cache for directory
            if(CTX->file_map()->get(fd)->type() ==
               gkfs::filemap::FileType::directory) {
                CTX->cache()->clear_dir(CTX->file_map()->get(fd)->path());
            }
            CTX->cache()->dump_cache_to_log(CTX->file_map()->get(fd)->path());
        }
        LOG(INFO, "{}(): XXX fd '{}'", __func__, fd);
        // No call to the daemon is required
        CTX->file_map()->remove(fd);
        return 0;
+28 −0
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@
#include <client/logging.hpp>
#include <client/rpc/forward_metadata.hpp>
#include <client/rpc/forward_metadata_proxy.hpp>
#include <client/cache.hpp>

#include <common/rpc/distributor.hpp>
#include <common/rpc/rpc_util.hpp>
@@ -45,6 +46,7 @@
#include <regex>
#include <csignal>
#include <random>
#include <filesystem>

extern "C" {
#include <sys/sysmacros.h>
@@ -203,8 +205,34 @@ namespace gkfs::utils {
 */
optional<gkfs::metadata::Metadata>
get_metadata(const string& path, bool follow_links) {
    LOG(INFO, "{}(): XXX Path '{}'", __func__, path);
    std::string attr;
    int err{};
    if(CTX->use_cache()) {
        std::filesystem::path p(path);
        auto parent = p.parent_path().string();
        auto filename = p.filename().string();
        LOG(INFO, "{}(): for path '{}' -> parent path '{}' leaf name '{}'",
            __func__, path, p.parent_path().string(), p.filename().string());
        auto cache_entry = CTX->cache()->get(parent, filename);
        if(cache_entry) {
            LOG(INFO, "{}(): Cache hit for path '{}'", __func__, path);
            // TOOD something like this:
            //            struct stat st{};
            //            metadata_to_stat(path, *cache_entry, st);
            //            return gkfs::metadata::Metadata{st};
            mode_t mode = 33188;
            if(cache_entry->file_type == gkfs::filemap::FileType::directory) {
                mode = 16895;
            }
            gkfs::metadata::Metadata md{};
            md.mode(mode);
            md.ctime(cache_entry->ctime);
            md.size(cache_entry->size);
            return md;
        }
    }

    if(gkfs::config::proxy::fwd_stat && CTX->use_proxy()) {
        err = gkfs::rpc::forward_stat_proxy(path, attr);
    } else {
+2 −0
Original line number Diff line number Diff line
@@ -765,6 +765,8 @@ forward_get_dirents(const string& path) {
            names_ptr += name.size() + 1;

            open_dir->add(name, ftype);
            LOG(INFO, "{}(): XXX parentpath '{}' name '{}'", __func__, path,
                name);
        }
    }
    return make_pair(err, open_dir);