Commit 5eb37db4 authored by sevenuz's avatar sevenuz
Browse files

pointertrick fix and shared locks for fuse

parent 8cd4b745
Loading
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -70,7 +70,7 @@ extern "C" {
#include <unistd.h>
#include <unordered_map>
#include <string>
#include <mutex>
#include <shared_mutex>
#include <cstdlib>
#include <atomic>

+108 −23
Original line number Diff line number Diff line
@@ -41,12 +41,12 @@

static struct fuse_lowlevel_ops ll_ops;
struct InodeShard {
    std::mutex mutex;
    std::shared_mutex mutex;
    std::unordered_map<fuse_ino_t, Inode> ino_map;
};

struct PathShard {
    std::mutex mutex;
    std::shared_mutex mutex;
    std::unordered_map<std::string, fuse_ino_t> path_map;
};

@@ -78,13 +78,13 @@ alloc_inode(const std::string& path) {
    } else {
        ino = next_ino++;
        auto& i_shard = ino_shards[std::hash<fuse_ino_t>{}(ino) % SHARD_COUNT];
        std::lock_guard<std::mutex> lk(i_shard.mutex);
        std::lock_guard<std::shared_mutex> lk(i_shard.mutex);
        i_shard.ino_map.emplace(std::piecewise_construct,
                                std::forward_as_tuple(ino),
                                std::forward_as_tuple(path, 1));
    }
    auto& p_shard = path_shards[std::hash<std::string>{}(path) % SHARD_COUNT];
    std::lock_guard<std::mutex> lk(p_shard.mutex);
    std::lock_guard<std::shared_mutex> lk(p_shard.mutex);
    p_shard.path_map[path] = ino;
    return ino;
}
@@ -98,7 +98,7 @@ get_inode(fuse_ino_t ino) {
        return (Inode*) ino;
    } else {
        auto& i_shard = ino_shards[std::hash<fuse_ino_t>{}(ino) % SHARD_COUNT];
        std::lock_guard<std::mutex> lk(i_shard.mutex);
        std::lock_guard<std::shared_mutex> lk(i_shard.mutex);
        auto it = i_shard.ino_map.find(ino);
        return it != i_shard.ino_map.end() ? &it->second : nullptr;
    }
@@ -107,7 +107,7 @@ get_inode(fuse_ino_t ino) {
static void
remove_inode_by_path(const std::string path) {
    auto& p_shard = path_shards[std::hash<std::string>{}(path) % SHARD_COUNT];
    std::lock_guard<std::mutex> lk(p_shard.mutex);
    std::lock_guard<std::shared_mutex> lk(p_shard.mutex);
    auto it_src = p_shard.path_map.find(path);
    if(it_src != p_shard.path_map.end()) {
        p_shard.path_map.erase(it_src);
@@ -129,7 +129,7 @@ remove_inode_by_ino(fuse_ino_t ino) {
        delete inode;
    } else {
        auto& i_shard = ino_shards[std::hash<fuse_ino_t>{}(ino) % SHARD_COUNT];
        std::lock_guard<std::mutex> lk(i_shard.mutex);
        std::lock_guard<std::shared_mutex> lk(i_shard.mutex);
        i_shard.ino_map.erase(ino);
    }
}
@@ -152,7 +152,9 @@ fill_fuse_entry_param(const u_data* ud, const std::string path,
    {
        auto& p_shard =
                path_shards[std::hash<std::string>{}(path) % SHARD_COUNT];
        std::lock_guard<std::mutex> lk(p_shard.mutex);
        // Shared lock: concurrent lookups are safe; forget takes an exclusive
        // lock so it cannot run while any lookup holds this shared lock.
        std::shared_lock<std::shared_mutex> lk(p_shard.mutex);
        if(check_path_map) {
            auto it = p_shard.path_map.find(path);
            if(it != p_shard.path_map.end()) {
@@ -160,10 +162,16 @@ fill_fuse_entry_param(const u_data* ud, const std::string path,
                found = true;
                if(gkfs::config::fuse::pointertrick) {
                    inode = (Inode*) ino;
                    // Must increment inside p_shard lock: prevents forget from
                    // decrementing to 0 and deleting the inode between the
                    // map lookup and the increment.
                    inode->lookup_count.fetch_add(1, std::memory_order_relaxed);
                } else {
                    auto& i_shard = ino_shards[std::hash<fuse_ino_t>{}(ino) %
                                               SHARD_COUNT];
                    std::lock_guard<std::mutex> i_lk(i_shard.mutex);
                    // Shared lock: only increments an atomic, does not modify
                    // the map structure.
                    std::shared_lock<std::shared_mutex> i_lk(i_shard.mutex);
                    auto it_ino = i_shard.ino_map.find(ino);
                    if(it_ino != i_shard.ino_map.end()) {
                        inode = &it_ino->second;
@@ -849,6 +857,37 @@ forget_handler(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) {
        return;
    }

    if(gkfs::config::fuse::pointertrick) {
        // Decrement and conditional erase must be atomic with respect to
        // fill_fuse_entry_param's map-lookup+increment: both hold the same
        // p_shard lock, so there is no window where forget can delete an
        // inode that another thread just retrieved from the map.
        bool should_delete = false;
        {
            auto& p_shard = path_shards[std::hash<std::string>{}(inode->path) %
                                        SHARD_COUNT];
            std::lock_guard<std::shared_mutex> lk(p_shard.mutex);
            uint64_t current =
                    inode->lookup_count.load(std::memory_order_relaxed);
            uint64_t new_count =
                    (current >= nlookup) ? (current - nlookup) : 0;
            inode->lookup_count.store(new_count, std::memory_order_relaxed);
            if(new_count == 0) { // && inode.open_count == 0
                LOG(DEBUG, "reached lookup_count 0 for ino {}", ino);
                auto it = p_shard.path_map.find(inode->path);
                if(it != p_shard.path_map.end()) {
                    p_shard.path_map.erase(it);
                }
                should_delete = true;
            }
        }
        if(should_delete) {
            delete inode;
        }
        fuse_reply_none(req);
        return;
    }

    uint64_t current = inode->lookup_count.load();
    while(current >= nlookup) {
        if(inode->lookup_count.compare_exchange_weak(current,
@@ -1077,7 +1116,7 @@ rename_handler(fuse_req_t req, fuse_ino_t old_parent, const char* old_name,
    {
        auto& p_shard =
                path_shards[std::hash<std::string>{}(old_path) % SHARD_COUNT];
        std::lock_guard<std::mutex> lk(p_shard.mutex);
        std::lock_guard<std::shared_mutex> lk(p_shard.mutex);
        auto it_src = p_shard.path_map.find(old_path);
        if(it_src != p_shard.path_map.end()) {
            src_ino = it_src->second;
@@ -1090,7 +1129,7 @@ rename_handler(fuse_req_t req, fuse_ino_t old_parent, const char* old_name,
    } else {
        auto& p_shard =
                path_shards[std::hash<std::string>{}(new_path) % SHARD_COUNT];
        std::lock_guard<std::mutex> lk(p_shard.mutex);
        std::lock_guard<std::shared_mutex> lk(p_shard.mutex);

        fuse_ino_t old_dst_ino = 0;
        auto it_dst = p_shard.path_map.find(new_path);
@@ -1101,22 +1140,39 @@ rename_handler(fuse_req_t req, fuse_ino_t old_parent, const char* old_name,
        p_shard.path_map[new_path] = src_ino;

        if(old_dst_ino != 0) {
            if(gkfs::config::fuse::pointertrick) {
                // Clear the displaced inode's path while still holding the
                // new_path p_shard lock.  This prevents forget from reading a
                // stale path and erasing the new_path entry (which now belongs
                // to src_ino) from path_map.
                auto* old_dst_inode = (Inode*) old_dst_ino;
                old_dst_inode->path.clear();
            } else {
                auto& i_shard = ino_shards[std::hash<fuse_ino_t>{}(old_dst_ino) %
                                           SHARD_COUNT];
            std::lock_guard<std::mutex> i_lk(i_shard.mutex);
                std::lock_guard<std::shared_mutex> i_lk(i_shard.mutex);
                auto it_ino = i_shard.ino_map.find(old_dst_ino);
                if(it_ino != i_shard.ino_map.end()) {
                    it_ino->second.path.clear();
                }
            }
        }
    }

    auto& i_shard = ino_shards[std::hash<fuse_ino_t>{}(src_ino) % SHARD_COUNT];
    std::lock_guard<std::mutex> i_lk(i_shard.mutex);
    if(gkfs::config::fuse::pointertrick) {
        // Update src inode's path string so that a subsequent forget uses the
        // correct p_shard and erases new_path, not old_path.
        auto* src_inode = (Inode*) src_ino;
        src_inode->path = new_path;
    } else {
        auto& i_shard =
                ino_shards[std::hash<fuse_ino_t>{}(src_ino) % SHARD_COUNT];
        std::lock_guard<std::shared_mutex> i_lk(i_shard.mutex);
        auto it_ino = i_shard.ino_map.find(src_ino);
        if(it_ino != i_shard.ino_map.end()) {
            it_ino->second.path = new_path;
        }
    }

    fuse_reply_err(req, 0);
}
@@ -1193,7 +1249,7 @@ init_gekkofs() {
    root_inode.path = root_path;
    auto& p_shard =
            path_shards[std::hash<std::string>{}(root_path) % SHARD_COUNT];
    std::lock_guard<std::mutex> lk(p_shard.mutex);
    std::lock_guard<std::shared_mutex> lk(p_shard.mutex);
    p_shard.path_map[root_path] = FUSE_ROOT_ID;
    std::cout << "root node allocated" << std::endl;
}
@@ -1210,6 +1266,35 @@ forget_multi_handler(fuse_req_t req, size_t count,
        if(!inode)
            continue;

        if(gkfs::config::fuse::pointertrick) {
            bool should_delete = false;
            {
                auto& p_shard =
                        path_shards[std::hash<std::string>{}(inode->path) %
                                    SHARD_COUNT];
                std::lock_guard<std::shared_mutex> lk(p_shard.mutex);
                uint64_t current =
                        inode->lookup_count.load(std::memory_order_relaxed);
                uint64_t new_count =
                        (current >= nlookup) ? (current - nlookup) : 0;
                inode->lookup_count.store(new_count,
                                          std::memory_order_relaxed);
                if(new_count == 0) {
                    LOG(DEBUG, "reached lookup_count 0 for ino {} (multi)",
                        ino);
                    auto it = p_shard.path_map.find(inode->path);
                    if(it != p_shard.path_map.end()) {
                        p_shard.path_map.erase(it);
                    }
                    should_delete = true;
                }
            }
            if(should_delete) {
                delete inode;
            }
            continue;
        }

        uint64_t current = inode->lookup_count.load();
        while(current >= nlookup) {
            if(inode->lookup_count.compare_exchange_weak(current,