Verified Commit d36b892d authored by Marc Vef's avatar Marc Vef
Browse files

Mountdir existence is no longer required for clients

Previously, an empty mountdir directory must have existed for no reason. As a result, the daemon created and removed this empty directory. This has been removed

unfinished: optimizing path resolution performance

Moving path code from preload context, updating hooks.cpp with new method

fix merge conflicts. not functional
parent c4d2a5a0
Loading
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -32,6 +32,8 @@

namespace gkfs::path {

enum class NormalizeStatus { ok, fd_unknown, fd_not_a_dir };

unsigned int
match_components(const std::string& path, unsigned int& path_components,
                 const std::vector<std::string>& components);
@@ -40,6 +42,16 @@ bool
resolve(const std::string& path, std::string& resolved,
        bool resolve_last_link = true);

NormalizeStatus
normalize(int dirfd, const char* raw_path, std::string& normalized_path,
          bool resolve_last_link = true);

std::string
normalize(const char* raw_path, bool resolve_last_link = true);

bool
is_in_gkfs(std::string& path, bool cut_mountdir_prefix = false);

std::string
get_sys_cwd();

+0 −11
Original line number Diff line number Diff line
@@ -70,8 +70,6 @@ struct FsConfig {
    std::string rootdir;
};

enum class RelativizeStatus { internal, external, fd_unknown, fd_not_a_dir };

/**
 * Singleton class of the client context with all relevant global data
 */
@@ -169,15 +167,6 @@ public:
    void
    auto_sm(bool auto_sm);

    RelativizeStatus
    relativize_fd_path(int dirfd, const char* raw_path,
                       std::string& relative_path, int flags = 0,
                       bool resolve_last_link = true) const;

    bool
    relativize_path(const char* raw_path, std::string& relative_path,
                    bool resolve_last_link = true) const;

    const std::shared_ptr<gkfs::filemap::OpenFileMap>&
    file_map() const;

+10 −0
Original line number Diff line number Diff line
@@ -109,6 +109,16 @@ namespace rocksdb {
constexpr auto use_write_ahead_log = false;
} // namespace rocksdb

namespace preload {
/*
 * This option allows a symlink outside of the GekkoFS namespace into the
 * GekkoFS namespace. This is by default disabled as it incurs a significant
 * overhead during GekkoFS' path resolution. Therefore, it is recommended to
 * keep it disabled
 */
constexpr auto allow_symlinks_into_gkfs = false;
} // namespace preload

namespace stats {
constexpr auto max_stats = 1000000; ///< How many stats will be stored
constexpr auto prometheus_gateway = "127.0.0.1:9091";
+305 −246

File changed.

Preview size limit exceeded, changes collapsed.

+211 −50
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@
#include <client/preload.hpp>
#include <client/logging.hpp>
#include <client/env.hpp>
#include <client/open_file_map.hpp>
#include <client/open_dir.hpp>

#include <common/path_util.hpp>

@@ -38,6 +40,7 @@
#include <string>
#include <cassert>
#include <climits>
#include <stack>

extern "C" {
#include <sys/stat.h>
@@ -46,53 +49,64 @@ extern "C" {

using namespace std;

namespace gkfs::path {

static const string excluded_paths[2] = {"sys/", "proc/"};

/** Match components in path
 *
 * Returns the number of consecutive components at start of `path`
 * that match the ones in `components` vector.
 *
 * `path_components` will be set to the total number of components found in
 * `path`
 *
 * Example:
 * ```ÏÏ
 *  unsigned int tot_comp;
 *  path_match_components("/matched/head/with/tail", &tot_comp, ["matched",
 * "head", "no"]) == 2; tot_comp == 4;
 * ```
namespace {
const string excluded_paths[2] = {"sys/", "proc/"};
/**
 * Normalize a given path with `.` and `..` components. This will not resolve
 * symlinks
 * @param path
 * @return normalized path
 */
unsigned int
match_components(const string& path, unsigned int& path_components,
                 const ::vector<string>& components) {
    unsigned int matched = 0;
    unsigned int processed_components = 0;
string
normalize(const string& path) {

    string normalized{}; // final normalized path
    normalized.reserve(path.size());
    string::size_type comp_size = 0; // size of current component
    string::size_type start = 0;     // start index of curr component
    string::size_type end = 0; // end index of curr component (last processed
                               // Path Separator "separator")
    stack<string::size_type> slash_idx{};
    slash_idx.push(0); // index of all slashes in resolved path (used for
                       // rollback due to `..`)

    while(++end < path.size()) {
        start = end;

        // Skip sequence of multiple path-separators.
        while(start < path.size() && path[start] == gkfs::path::separator) {
            start++;
        }
        // Find next component
        end = path.find(path::separator, start);
        end = path.find_first_of(gkfs::path::separator, start);
        if(end == string::npos) {
            end = path.size();
        }

        comp_size = end - start;
        if(matched == processed_components &&
           path.compare(start, comp_size, components.at(matched)) == 0) {
            ++matched;

        // component is empty (this must be the last component)
        if(comp_size == 0) {
            break;
        }
        ++processed_components;
        // component is '.', we skip it
        if(comp_size == 1 && path.at(start) == '.') {
            continue;
        }
    path_components = processed_components;
    return matched;
        // component is '..' we need to rollback normalized path
        if(comp_size == 2 && path.at(start) == '.' &&
           path.at(start + 1) == '.') {
            if(!normalized.empty()) {
                normalized.erase(slash_idx.top());
                slash_idx.pop();
            }
            continue;
        }
        // add `/<component>` to the normalized path
        normalized.push_back(gkfs::path::separator);
        slash_idx.push(normalized.size() - 1);
        normalized.append(path, start, comp_size);
    }
    LOG(INFO, "path: '{}', normalized: '{}'", path, normalized);
    return normalized;
}

/** Resolve path to its canonical representation
@@ -113,7 +127,7 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
    LOG(DEBUG, "path: \"{}\", resolved: \"{}\", resolve_last_link: {}", path,
        resolved, resolve_last_link);

    assert(path::is_absolute(path));
    assert(gkfs::path::is_absolute(path));

    for(auto& excl_path : excluded_paths) {
        if(path.compare(1, excl_path.length(), excl_path) == 0) {
@@ -132,8 +146,9 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
    string::size_type start = 0;     // start index of curr component
    string::size_type end = 0; // end index of curr component (last processed
                               // Path Separator "separator")
    string::size_type last_slash_pos =
            0; // index of last slash in resolved path
    stack<string::size_type> slash_idx{};
    slash_idx.push(0); // index of all slashes in resolved path (used for
                       // rollback due to `..`)
    resolved.clear();
    resolved.reserve(path.size());

@@ -141,12 +156,12 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
        start = end;

        /* Skip sequence of multiple path-separators. */
        while(start < path.size() && path[start] == path::separator) {
        while(start < path.size() && path[start] == gkfs::path::separator) {
            ++start;
        }

        // Find next component
        end = path.find(path::separator, start);
        end = path.find_first_of(gkfs::path::separator, start);
        if(end == string::npos) {
            end = path.size();
        }
@@ -164,12 +179,8 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
           path.at(start + 1) == '.') {
            // component is '..' we need to rollback resolved path
            if(!resolved.empty()) {
                resolved.erase(last_slash_pos);
                /* TODO     Optimization
                 * the previous slash position should be stored.
                 * The following search could be avoided.
                 */
                last_slash_pos = resolved.find_last_of(path::separator);
                resolved.erase(slash_idx.top());
                slash_idx.pop();
            }
            if(resolved_components > 0) {
                if(matched_components == resolved_components) {
@@ -181,11 +192,15 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
        }

        // add `/<component>` to the reresolved path
        resolved.push_back(path::separator);
        last_slash_pos = resolved.size() - 1;
        resolved.push_back(gkfs::path::separator);
        slash_idx.push(resolved.size() - 1);
        resolved.append(path, start, comp_size);

        if(matched_components < mnt_components.size()) {
        /*
         * This will be true for all path components outside of GKFS and up to
         * the mountdir's parent path The mountdir directory is considered to be
         * inside GKFS as it is entirely virtual and does not need to exist
         */
        if(matched_components < mnt_components.size() - 1) {
            // Outside GekkoFS
            if(matched_components == resolved_components &&
               path.compare(start, comp_size,
@@ -216,10 +231,15 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
                }
                // substituute resolved with new link path
                resolved = link_resolved.get();
                matched_components = match_components(
                matched_components = gkfs::path::match_components(
                        resolved, resolved_components, mnt_components);
                // set matched counter to value coherent with the new path
                last_slash_pos = resolved.find_last_of(path::separator);
                stack<string::size_type> slash_idx_new{};
                for(size_t i = 0; i < resolved.size(); i++) {
                    if(resolved[i] == gkfs::path::separator)
                        slash_idx_new.push(i);
                }
                slash_idx = slash_idx_new;
                continue;
            } else if((!S_ISDIR(st.st_mode)) && (end != path.size())) {
                resolved.append(path, end, string::npos);
@@ -239,12 +259,153 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
    }

    if(resolved.empty()) {
        resolved.push_back(path::separator);
        resolved.push_back(gkfs::path::separator);
    }
    LOG(DEBUG, "external: \"{}\"", resolved);
    return false;
}

} // namespace

namespace gkfs::path {

/** Match components in path
 *
 * Returns the number of consecutive components at start of `path`
 * that match the ones in `components` vector.
 *
 * `path_components` will be set to the total number of components found in
 * `path`
 *
 * Example:
 * ```ÏÏ
 *  unsigned int tot_comp;
 *  path_match_components("/matched/head/with/tail", &tot_comp, ["matched",
 * "head", "no"]) == 2; tot_comp == 4;
 * ```
 */
unsigned int
match_components(const string& path, unsigned int& path_components,
                 const ::vector<string>& components) {
    unsigned int matched = 0;
    unsigned int processed_components = 0;
    string::size_type comp_size = 0; // size of current component
    string::size_type start = 0;     // start index of curr component
    string::size_type end = 0; // end index of curr component (last processed
                               // Path Separator "separator")

    while(++end < path.size()) {
        start = end;

        // Find next component
        end = path.find(path::separator, start);
        if(end == string::npos) {
            end = path.size();
        }

        comp_size = end - start;
        if(matched == processed_components &&
           path.compare(start, comp_size, components.at(matched)) == 0) {
            ++matched;
        }
        ++processed_components;
    }
    path_components = processed_components;
    return matched;
}

NormalizeStatus
normalize(int dirfd, const char* raw_path, std::string& normalized_path,
          bool resolve_last_link) {
    // TODO when LEAF is available: return concated path and throw Status
    // instead. Relativize path should be called only after the library
    // constructor has been executed
    assert(CTX->interception_enabled());
    // If we run the constructor we also already setup the mountdir
    assert(!CTX->mountdir().empty());

    // We assume raw path is valid
    assert(raw_path != nullptr);

    std::string path{};

    if(raw_path[0] != gkfs::path::separator) {
        // path is relative
        if(dirfd == AT_FDCWD) {
            // path is relative to cwd
            path = gkfs::path::prepend_path(CTX->cwd(), raw_path);
        } else {
            if(!CTX->file_map()->exist(dirfd)) {
                return NormalizeStatus::fd_unknown;
            }
            // path is relative to fd
            auto dir = CTX->file_map()->get_dir(dirfd);
            if(dir == nullptr) {
                return NormalizeStatus::fd_not_a_dir;
            }
            path = CTX->mountdir();
            path.append(dir->path());
            path.push_back(gkfs::path::separator);
            path.append(raw_path);
        }
    } else {
        path = raw_path;
    }
    normalized_path = ::normalize(path);
    return NormalizeStatus::ok;
}

std::string
normalize(const char* raw_path, bool resolve_last_link) {
    // TODO when LEAF is available: return concated path and throw Status
    // instead. Relativize path should be called only after the library
    // constructor has been executed
    assert(CTX->interception_enabled());
    // If we run the constructor we also already setup the mountdir
    assert(!CTX->mountdir().empty());

    // We assume raw path is valid
    assert(raw_path != nullptr);

    std::string path;

    if(raw_path[0] != gkfs::path::separator) {
        /* Path is not absolute, we need to prepend CWD;
         * First reserve enough space to minimize memory copy
         */
        path = gkfs::path::prepend_path(CTX->cwd(), raw_path);
    } else {
        path = raw_path;
    }
    return ::normalize(path);
}

/**
 * Checks if a path is within gkfs namespace. normalize_path() should have been
 * called before as the mountpoint in the path is checked from the beginning and
 * must therefore be absolute.
 *
 * cut_mountdir_prefix can be passed to remove the mountdir prefix from path:
 * /tmp/mountdir/gkfsfile -> /gkfsfile It modifies the given path instead of
 * returning a new string to avoid copying, as this function is performance
 * critical
 *
 * @param path (absolute path)
 * @param cut_mountdir_prefix (default false, if true gkfs mountpoint path is
 * cut if it is within gkfs namespace)
 * @return true if within gkfs namespace else false
 */
bool
is_in_gkfs(std::string& path, bool cut_mountdir_prefix) {
    if(path.rfind(CTX->mountdir(), 0) != std::string::npos) {
        if(cut_mountdir_prefix)
            path.erase(1, CTX->mountdir().size());
        return true;
    } else {
        return false;
    }
}

string
get_sys_cwd() {
    char temp[path::max_length];
Loading