Verified Commit 97899545 authored by Marc Vef's avatar Marc Vef
Browse files

unfinished: optimizing path resolution performance

Moving path code from preload context, updating hooks.cpp with new method
parent fdb6a436
Loading
Loading
Loading
Loading
+12 −0
Original line number Original line Diff line number Diff line
@@ -16,6 +16,8 @@


namespace gkfs::path {
namespace gkfs::path {


enum class NormalizeStatus { ok, fd_unknown, fd_not_a_dir };

unsigned int
unsigned int
match_components(const std::string& path, unsigned int& path_components,
match_components(const std::string& path, unsigned int& path_components,
                 const std::vector<std::string>& components);
                 const std::vector<std::string>& components);
@@ -24,6 +26,16 @@ bool
resolve(const std::string& path, std::string& resolved,
resolve(const std::string& path, std::string& resolved,
        bool resolve_last_link = true);
        bool resolve_last_link = true);


NormalizeStatus
normalize(int dirfd, const char* raw_path, std::string& normalized_path,
          bool resolve_last_link = true);

std::string
normalize(const char* raw_path, bool resolve_last_link = true);

bool
is_in_gkfs(std::string& path, bool cut_mountdir_prefix = false);

std::string
std::string
get_sys_cwd();
get_sys_cwd();


+0 −11
Original line number Original line Diff line number Diff line
@@ -53,8 +53,6 @@ struct FsConfig {
    std::string rootdir;
    std::string rootdir;
};
};


enum class RelativizeStatus { internal, external, fd_unknown, fd_not_a_dir };

/**
/**
 * Singleton class of the client context with all relevant global data
 * Singleton class of the client context with all relevant global data
 */
 */
@@ -150,15 +148,6 @@ public:
    void
    void
    auto_sm(bool auto_sm);
    auto_sm(bool auto_sm);


    RelativizeStatus
    relativize_fd_path(int dirfd, const char* raw_path,
                       std::string& relative_path,
                       bool resolve_last_link = true) const;

    bool
    relativize_path(const char* raw_path, std::string& relative_path,
                    bool resolve_last_link = true) const;

    const std::shared_ptr<gkfs::filemap::OpenFileMap>&
    const std::shared_ptr<gkfs::filemap::OpenFileMap>&
    file_map() const;
    file_map() const;


+12 −1
Original line number Original line Diff line number Diff line
@@ -81,6 +81,17 @@ namespace rocksdb {
constexpr auto use_write_ahead_log = false;
constexpr auto use_write_ahead_log = false;
} // namespace rocksdb
} // namespace rocksdb


} // namespace gkfs::config
namespace preload {
/*
 * This option allows a symlink outside of the GekkoFS namespace into the
 * GekkoFS namespace. This is by default disabled as it incurs a significant
 * overhead during GekkoFS' path resolution. Therefore, it is recommended to
 * keep it disabled
 */
constexpr auto allow_symlinks_into_gkfs = false;
}

} // namespace gkfs
} // namespace config


#endif // GEKKOFS_CONFIG_HPP
#endif // GEKKOFS_CONFIG_HPP
+225 −189

File changed.

Preview size limit exceeded, changes collapsed.

+206 −49
Original line number Original line Diff line number Diff line
@@ -15,6 +15,8 @@
#include <client/preload.hpp>
#include <client/preload.hpp>
#include <client/logging.hpp>
#include <client/logging.hpp>
#include <client/env.hpp>
#include <client/env.hpp>
#include <client/open_file_map.hpp>
#include <client/open_dir.hpp>


#include <global/path_util.hpp>
#include <global/path_util.hpp>


@@ -22,6 +24,7 @@
#include <string>
#include <string>
#include <cassert>
#include <cassert>
#include <climits>
#include <climits>
#include <stack>


extern "C" {
extern "C" {
#include <sys/stat.h>
#include <sys/stat.h>
@@ -30,54 +33,64 @@ extern "C" {


using namespace std;
using namespace std;


namespace gkfs {
namespace {
namespace path {
const string excluded_paths[2] = {"sys/", "proc/"};

/**
static const string excluded_paths[2] = {"sys/", "proc/"};
 * Normalize a given path with `.` and `..` components. This will not resolve

 * symlinks
/** Match components in path
 * @param path
 *
 * @return normalized path
 * Returns the number of consecutive components at start of `path`
 * that match the ones in `components` vector.
 *
 * `path_components` will be set to the total number of components found in
 * `path`
 *
 * Example:
 * ```ÏÏ
 *  unsigned int tot_comp;
 *  path_match_components("/matched/head/with/tail", &tot_comp, ["matched",
 * "head", "no"]) == 2; tot_comp == 4;
 * ```
 */
 */
unsigned int
string
match_components(const string& path, unsigned int& path_components,
normalize(const string& path) {
                 const ::vector<string>& components) {

    unsigned int matched = 0;
    string normalized{}; // final normalized path
    unsigned int processed_components = 0;
    normalized.reserve(path.size());
    string::size_type comp_size = 0; // size of current component
    string::size_type comp_size = 0; // size of current component
    string::size_type start = 0;     // start index of curr component
    string::size_type start = 0;     // start index of curr component
    string::size_type end = 0; // end index of curr component (last processed
    string::size_type end = 0; // end index of curr component (last processed
                               // Path Separator "separator")
                               // Path Separator "separator")
    stack<string::size_type> slash_idx{};
    slash_idx.push(0); // index of all slashes in resolved path (used for
                       // rollback due to `..`)


    while(++end < path.size()) {
    while(++end < path.size()) {
        start = end;
        start = end;

        // Skip sequence of multiple path-separators.
        while(start < path.size() && path[start] == gkfs::path::separator) {
            start++;
        }
        // Find next component
        // Find next component
        end = path.find(path::separator, start);
        end = path.find_first_of(gkfs::path::separator, start);
        if(end == string::npos) {
        if(end == string::npos) {
            end = path.size();
            end = path.size();
        }
        }

        comp_size = end - start;
        comp_size = end - start;
        if(matched == processed_components &&

           path.compare(start, comp_size, components.at(matched)) == 0) {
        // component is empty (this must be the last component)
            ++matched;
        if(comp_size == 0) {
            break;
        }
        }
        ++processed_components;
        // component is '.', we skip it
        if(comp_size == 1 && path.at(start) == '.') {
            continue;
        }
        }
    path_components = processed_components;
        // component is '..' we need to rollback normalized path
    return matched;
        if(comp_size == 2 && path.at(start) == '.' &&
           path.at(start + 1) == '.') {
            if(!normalized.empty()) {
                normalized.erase(slash_idx.top());
                slash_idx.pop();
            }
            continue;
        }
        // add `/<component>` to the normalized path
        normalized.push_back(gkfs::path::separator);
        slash_idx.push(normalized.size() - 1);
        normalized.append(path, start, comp_size);
    }
    LOG(INFO, "path: '{}', normalized: '{}'", path, normalized);
    return normalized;
}
}


/** Resolve path to its canonical representation
/** Resolve path to its canonical representation
@@ -98,7 +111,7 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
    LOG(DEBUG, "path: \"{}\", resolved: \"{}\", resolve_last_link: {}", path,
    LOG(DEBUG, "path: \"{}\", resolved: \"{}\", resolve_last_link: {}", path,
        resolved, resolve_last_link);
        resolved, resolve_last_link);


    assert(path::is_absolute(path));
    assert(gkfs::path::is_absolute(path));


    for(auto& excl_path : excluded_paths) {
    for(auto& excl_path : excluded_paths) {
        if(path.compare(1, excl_path.length(), excl_path) == 0) {
        if(path.compare(1, excl_path.length(), excl_path) == 0) {
@@ -117,8 +130,9 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
    string::size_type start = 0;     // start index of curr component
    string::size_type start = 0;     // start index of curr component
    string::size_type end = 0; // end index of curr component (last processed
    string::size_type end = 0; // end index of curr component (last processed
                               // Path Separator "separator")
                               // Path Separator "separator")
    string::size_type last_slash_pos =
    stack<string::size_type> slash_idx{};
            0; // index of last slash in resolved path
    slash_idx.push(0); // index of all slashes in resolved path (used for
                       // rollback due to `..`)
    resolved.clear();
    resolved.clear();
    resolved.reserve(path.size());
    resolved.reserve(path.size());


@@ -126,12 +140,12 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
        start = end;
        start = end;


        /* Skip sequence of multiple path-separators. */
        /* Skip sequence of multiple path-separators. */
        while(start < path.size() && path[start] == path::separator) {
        while(start < path.size() && path[start] == gkfs::path::separator) {
            ++start;
            ++start;
        }
        }


        // Find next component
        // Find next component
        end = path.find(path::separator, start);
        end = path.find_first_of(gkfs::path::separator, start);
        if(end == string::npos) {
        if(end == string::npos) {
            end = path.size();
            end = path.size();
        }
        }
@@ -149,12 +163,8 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
           path.at(start + 1) == '.') {
           path.at(start + 1) == '.') {
            // component is '..' we need to rollback resolved path
            // component is '..' we need to rollback resolved path
            if(!resolved.empty()) {
            if(!resolved.empty()) {
                resolved.erase(last_slash_pos);
                resolved.erase(slash_idx.top());
                /* TODO     Optimization
                slash_idx.pop();
                 * the previous slash position should be stored.
                 * The following search could be avoided.
                 */
                last_slash_pos = resolved.find_last_of(path::separator);
            }
            }
            if(resolved_components > 0) {
            if(resolved_components > 0) {
                if(matched_components == resolved_components) {
                if(matched_components == resolved_components) {
@@ -166,8 +176,8 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
        }
        }


        // add `/<component>` to the reresolved path
        // add `/<component>` to the reresolved path
        resolved.push_back(path::separator);
        resolved.push_back(gkfs::path::separator);
        last_slash_pos = resolved.size() - 1;
        slash_idx.push(resolved.size() - 1);
        resolved.append(path, start, comp_size);
        resolved.append(path, start, comp_size);
        /*
        /*
         * This will be true for all path components outside of GKFS and up to
         * This will be true for all path components outside of GKFS and up to
@@ -205,10 +215,15 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
                }
                }
                // substituute resolved with new link path
                // substituute resolved with new link path
                resolved = link_resolved.get();
                resolved = link_resolved.get();
                matched_components = match_components(
                matched_components = gkfs::path::match_components(
                        resolved, resolved_components, mnt_components);
                        resolved, resolved_components, mnt_components);
                // set matched counter to value coherent with the new path
                // set matched counter to value coherent with the new path
                last_slash_pos = resolved.find_last_of(path::separator);
                stack<string::size_type> slash_idx_new{};
                for(size_t i = 0; i < resolved.size(); i++) {
                    if(resolved[i] == gkfs::path::separator)
                        slash_idx_new.push(i);
                }
                slash_idx = slash_idx_new;
                continue;
                continue;
            } else if((!S_ISDIR(st.st_mode)) && (end != path.size())) {
            } else if((!S_ISDIR(st.st_mode)) && (end != path.size())) {
                resolved.append(path, end, string::npos);
                resolved.append(path, end, string::npos);
@@ -228,12 +243,154 @@ resolve(const string& path, string& resolved, bool resolve_last_link) {
    }
    }


    if(resolved.empty()) {
    if(resolved.empty()) {
        resolved.push_back(path::separator);
        resolved.push_back(gkfs::path::separator);
    }
    }
    LOG(DEBUG, "external: \"{}\"", resolved);
    LOG(DEBUG, "external: \"{}\"", resolved);
    return false;
    return false;
}
}


} // namespace

namespace gkfs {
namespace path {

/** Match components in path
 *
 * Returns the number of consecutive components at start of `path`
 * that match the ones in `components` vector.
 *
 * `path_components` will be set to the total number of components found in
 * `path`
 *
 * Example:
 * ```ÏÏ
 *  unsigned int tot_comp;
 *  path_match_components("/matched/head/with/tail", &tot_comp, ["matched",
 * "head", "no"]) == 2; tot_comp == 4;
 * ```
 */
unsigned int
match_components(const string& path, unsigned int& path_components,
                 const ::vector<string>& components) {
    unsigned int matched = 0;
    unsigned int processed_components = 0;
    string::size_type comp_size = 0; // size of current component
    string::size_type start = 0;     // start index of curr component
    string::size_type end = 0; // end index of curr component (last processed
                               // Path Separator "separator")

    while(++end < path.size()) {
        start = end;

        // Find next component
        end = path.find(path::separator, start);
        if(end == string::npos) {
            end = path.size();
        }

        comp_size = end - start;
        if(matched == processed_components &&
           path.compare(start, comp_size, components.at(matched)) == 0) {
            ++matched;
        }
        ++processed_components;
    }
    path_components = processed_components;
    return matched;
}

NormalizeStatus
normalize(int dirfd, const char* raw_path, std::string& normalized_path,
          bool resolve_last_link) {
    // TODO when LEAF is available: return concated path and throw Status
    // instead. Relativize path should be called only after the library
    // constructor has been executed
    assert(CTX->interception_enabled());
    // If we run the constructor we also already setup the mountdir
    assert(!CTX->mountdir().empty());

    // We assume raw path is valid
    assert(raw_path != nullptr);

    std::string path{};

    if(raw_path[0] != gkfs::path::separator) {
        // path is relative
        if(dirfd == AT_FDCWD) {
            // path is relative to cwd
            path = gkfs::path::prepend_path(CTX->cwd(), raw_path);
        } else {
            if(!CTX->file_map()->exist(dirfd)) {
                return NormalizeStatus::fd_unknown;
            }
            // path is relative to fd
            auto dir = CTX->file_map()->get_dir(dirfd);
            if(dir == nullptr) {
                return NormalizeStatus::fd_not_a_dir;
            }
            path = CTX->mountdir();
            path.append(dir->path());
            path.push_back(gkfs::path::separator);
            path.append(raw_path);
        }
    } else {
        path = raw_path;
    }
    normalized_path = ::normalize(path);
    return NormalizeStatus::ok;
}

std::string
normalize(const char* raw_path, bool resolve_last_link) {
    // TODO when LEAF is available: return concated path and throw Status
    // instead. Relativize path should be called only after the library
    // constructor has been executed
    assert(CTX->interception_enabled());
    // If we run the constructor we also already setup the mountdir
    assert(!CTX->mountdir().empty());

    // We assume raw path is valid
    assert(raw_path != nullptr);

    std::string path;

    if(raw_path[0] != gkfs::path::separator) {
        /* Path is not absolute, we need to prepend CWD;
         * First reserve enough space to minimize memory copy
         */
        path = gkfs::path::prepend_path(CTX->cwd(), raw_path);
    } else {
        path = raw_path;
    }
    return ::normalize(path);
}

/**
 * Checks if a path is within gkfs namespace. normalize_path() should have been
 * called before as the mountpoint in the path is checked from the beginning and
 * must therefore be absolute.
 *
 * cut_mountdir_prefix can be passed to remove the mountdir prefix from path:
 * /tmp/mountdir/gkfsfile -> /gkfsfile It modifies the given path instead of
 * returning a new string to avoid copying, as this function is performance
 * critical
 *
 * @param path (absolute path)
 * @param cut_mountdir_prefix (default false, if true gkfs mountpoint path is
 * cut if it is within gkfs namespace)
 * @return true if within gkfs namespace else false
 */
bool
is_in_gkfs(std::string& path, bool cut_mountdir_prefix) {
    if(path.rfind(CTX->mountdir(), 0) != std::string::npos) {
        if(cut_mountdir_prefix)
            path.erase(1, CTX->mountdir().size());
        return true;
    } else {
        return false;
    }
}

string
string
get_sys_cwd() {
get_sys_cwd() {
    char temp[path::max_length];
    char temp[path::max_length];
Loading