Commit 28041105 authored by Alberto Miranda's avatar Alberto Miranda ♨️
Browse files

Merge branch '94-remove-exist-check-when-creating-a-file' into 'master'

Optimization of create, stat, and remove operations. The following changes have been made:

- `gkfs_open()` logic refactored.
- create: Previously a stat RPC was send before each create to make sure the file doesn't exist. This logic is now implicit in the create operation on the daemon side.
- `get_metadata()` was used on the client for all stat operation, creating a `shared_ptr` for the `Metadata` object in the process which was not needed. A new function `get_metadata_attr()` was created to only return the metadata binary string, which will only create the metadata object if actually required.
- Remove logic was separated into two operations: `remove_metadata()` and `remove_data()`. 
- `gkfs::config::metadata::implicit_data_removal` setting: If `true`, will remove data on the same node during the `remove_metadata()` RPC. This is mainly an optimization, but will be useful for future asynchronous removal implementations.

Previously, the code path looked like this:
1. `stat()` to get `size` and `mode`.
2. Use these fields to determine if data needs to be removed or just metadata.
3. `remove()` is called which, first, sends a single RPC to the daemon with the metadata. Afterwards, data is removed.
4. The daemon used one handler for both cases.

It now looks like this: 
1. `remove()` is called which, first, sends a single RPC to the daemon with the metadata. Before, removing the metadata, the daemon fetches `mode` and `size`. If `implicit_data_removal` is set as a configuration, the data is removed in this RPC as well. `mode` and `size` are returned to the client.
2. The client determines if data needs to be removed as well.
3. If yes, sends a `remove_data()` RPC as it was previously.

Depends on !66 and !74.

Closes #94

See merge request !60
parents f59fe8fd ff0c5dc1
Loading
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -55,7 +55,7 @@ to_underlying(E e) {
    return static_cast<typename std::underlying_type<E>::type>(e);
}

std::shared_ptr<gkfs::metadata::Metadata>
std::optional<gkfs::metadata::Metadata>
get_metadata(const std::string& path, bool follow_links = false);

int
+1 −2
Original line number Diff line number Diff line
@@ -41,8 +41,7 @@ int
forward_stat(const std::string& path, std::string& attr);

int
forward_remove(const std::string& path, bool remove_metadentry_only,
               ssize_t size);
forward_remove(const std::string& path);

int
forward_decr_size(const std::string& path, size_t length);
+134 −10
Original line number Diff line number Diff line
@@ -462,8 +462,8 @@ struct stat {
};

//==============================================================================
// definitions for remove
struct remove {
// definitions for remove metadata
struct remove_metadata {

    // forward declarations of public input/output types for this RPC
    class input;
@@ -471,23 +471,23 @@ struct remove {
    class output;

    // traits used so that the engine knows what to do with the RPC
    using self_type = remove;
    using self_type = remove_metadata;
    using handle_type = hermes::rpc_handle<self_type>;
    using input_type = input;
    using output_type = output;
    using mercury_input_type = rpc_rm_node_in_t;
    using mercury_output_type = rpc_err_out_t;
    using mercury_output_type = rpc_rm_metadata_out_t;

    // RPC public identifier
    // (N.B: we reuse the same IDs assigned by Margo so that the daemon
    // understands Hermes RPCs)
    constexpr static const uint64_t public_id = 2549415936;
    constexpr static const uint64_t public_id = 2087845888;

    // RPC internal Mercury identifier
    constexpr static const hg_id_t mercury_id = public_id;

    // RPC name
    constexpr static const auto name = gkfs::rpc::tag::remove;
    constexpr static const auto name = gkfs::rpc::tag::remove_metadata;

    // requires response?
    constexpr static const auto requires_response = true;
@@ -498,7 +498,7 @@ struct remove {

    // Mercury callback to serialize output arguments
    constexpr static const auto mercury_out_proc_cb =
            HG_GEN_PROC_NAME(rpc_err_out_t);
            HG_GEN_PROC_NAME(rpc_rm_metadata_out_t);

    class input {

@@ -541,9 +541,10 @@ struct remove {
        hermes::detail::post_to_mercury(ExecutionContext*);

    public:
        output() : m_err() {}
        output() : m_err(), m_size(), m_mode() {}

        output(int32_t err) : m_err(err) {}
        output(int32_t err, int64_t size, uint32_t mode)
            : m_err(err), m_size(size), m_mode(mode) {}

        output(output&& rhs) = default;

@@ -555,8 +556,10 @@ struct remove {
        output&
        operator=(const output& other) = default;

        explicit output(const rpc_err_out_t& out) {
        explicit output(const rpc_rm_metadata_out_t& out) {
            m_err = out.err;
            m_size = out.size;
            m_mode = out.mode;
        }

        int32_t
@@ -564,8 +567,21 @@ struct remove {
            return m_err;
        }

        int64_t
        size() const {
            return m_size;
        }

        uint32_t
        mode() const {
            return m_mode;
        };


    private:
        int32_t m_err;
        int64_t m_size;
        uint32_t m_mode;
    };
};

@@ -1285,6 +1301,114 @@ struct mk_symlink {

#endif // HAS_SYMLINKS

//==============================================================================
// definitions for remove data
struct remove_data {

    // forward declarations of public input/output types for this RPC
    class input;

    class output;

    // traits used so that the engine knows what to do with the RPC
    using self_type = remove_data;
    using handle_type = hermes::rpc_handle<self_type>;
    using input_type = input;
    using output_type = output;
    using mercury_input_type = rpc_rm_node_in_t;
    using mercury_output_type = rpc_err_out_t;

    // RPC public identifier
    // (N.B: we reuse the same IDs assigned by Margo so that the daemon
    // understands Hermes RPCs)
    constexpr static const uint64_t public_id = 2649292800;

    // RPC internal Mercury identifier
    constexpr static const hg_id_t mercury_id = public_id;

    // RPC name
    constexpr static const auto name = gkfs::rpc::tag::remove_data;

    // requires response?
    constexpr static const auto requires_response = true;

    // Mercury callback to serialize input arguments
    constexpr static const auto mercury_in_proc_cb =
            HG_GEN_PROC_NAME(rpc_rm_node_in_t);

    // Mercury callback to serialize output arguments
    constexpr static const auto mercury_out_proc_cb =
            HG_GEN_PROC_NAME(rpc_err_out_t);

    class input {

        template <typename ExecutionContext>
        friend hg_return_t
        hermes::detail::post_to_mercury(ExecutionContext*);

    public:
        input(const std::string& path) : m_path(path) {}

        input(input&& rhs) = default;

        input(const input& other) = default;

        input&
        operator=(input&& rhs) = default;

        input&
        operator=(const input& other) = default;

        std::string
        path() const {
            return m_path;
        }

        explicit input(const rpc_rm_node_in_t& other) : m_path(other.path) {}

        explicit operator rpc_rm_node_in_t() {
            return {m_path.c_str()};
        }

    private:
        std::string m_path;
    };

    class output {

        template <typename ExecutionContext>
        friend hg_return_t
        hermes::detail::post_to_mercury(ExecutionContext*);

    public:
        output() : m_err() {}

        output(int32_t err) : m_err(err) {}

        output(output&& rhs) = default;

        output(const output& other) = default;

        output&
        operator=(output&& rhs) = default;

        output&
        operator=(const output& other) = default;

        explicit output(const rpc_err_out_t& out) {
            m_err = out.err;
        }

        int32_t
        err() const {
            return m_err;
        }

    private:
        int32_t m_err;
    };
};

//==============================================================================
// definitions for write_data
struct write_data {
+12 −0
Original line number Diff line number Diff line
@@ -48,6 +48,18 @@ constexpr auto use_ctime = false;
constexpr auto use_mtime = false;
constexpr auto use_link_cnt = false;
constexpr auto use_blocks = false;
/*
 * If true, all chunks on the same host are removed during a metadata remove
 * rpc. This is a technical optimization that reduces the number of RPCs for
 * remove operations. This setting could be useful for future asynchronous
 * remove implementations where the data should not be removed immediately.
 */
constexpr auto implicit_data_removal = true;

// metadata logic
// Check for existence of file metadata before create. This done on RocksDB
// level
constexpr auto create_exist_check = true;
} // namespace metadata

namespace rpc {
+5 −0
Original line number Diff line number Diff line
@@ -30,6 +30,11 @@ public:
    explicit NotFoundException(const std::string& s) : DBException(s){};
};

class ExistsException : public DBException {
public:
    explicit ExistsException(const std::string& s) : DBException(s){};
};

} // namespace metadata
} // namespace gkfs

Loading