From d3d9f17064ada8c815e350db3b65734e4fb96765 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 8 Mar 2023 18:33:29 +0100 Subject: [PATCH 1/7] Client: Adding append support --- include/client/gkfs_functions.hpp | 2 +- include/client/rpc/forward_data.hpp | 5 +-- include/client/rpc/rpc_types.hpp | 10 ++--- src/client/gkfs_functions.cpp | 61 +++++++++++++++++------------ src/client/rpc/forward_data.cpp | 13 +----- 5 files changed, 45 insertions(+), 46 deletions(-) diff --git a/include/client/gkfs_functions.hpp b/include/client/gkfs_functions.hpp index 437b052ef..d005d2fb9 100644 --- a/include/client/gkfs_functions.hpp +++ b/include/client/gkfs_functions.hpp @@ -106,7 +106,7 @@ gkfs_readlink(const std::string& path, char* buf, int bufsize); ssize_t gkfs_pwrite(std::shared_ptr file, const char* buf, - size_t count, off64_t offset); + size_t count, off64_t offset, bool update_pos = false); ssize_t gkfs_pwrite_ws(int fd, const void* buf, size_t count, off64_t offset); diff --git a/include/client/rpc/forward_data.hpp b/include/client/rpc/forward_data.hpp index d22abe9da..8518e997f 100644 --- a/include/client/rpc/forward_data.hpp +++ b/include/client/rpc/forward_data.hpp @@ -42,9 +42,8 @@ struct ChunkStat { // an exception. std::pair -forward_write(const std::string& path, const void* buf, bool append_flag, - off64_t in_offset, size_t write_size, - int64_t updated_metadentry_size); +forward_write(const std::string& path, const void* buf, off64_t offset, + size_t write_size); std::pair forward_read(const std::string& path, void* buf, off64_t offset, diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index ad0f53a1b..5993a31c3 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -1158,10 +1158,10 @@ struct update_metadentry_size { hermes::detail::post_to_mercury(ExecutionContext*); public: - output() : m_err(), m_ret_size() {} + output() : m_err(), m_ret_offset() {} output(int32_t err, int64_t ret_size) - : m_err(err), m_ret_size(ret_size) {} + : m_err(err), m_ret_offset(ret_size) {} output(output&& rhs) = default; @@ -1175,7 +1175,7 @@ struct update_metadentry_size { explicit output(const rpc_update_metadentry_size_out_t& out) { m_err = out.err; - m_ret_size = out.ret_size; + m_ret_offset = out.ret_offset; } int32_t @@ -1185,12 +1185,12 @@ struct update_metadentry_size { int64_t ret_size() const { - return m_ret_size; + return m_ret_offset; } private: int32_t m_err; - int64_t m_ret_size; + int64_t m_ret_offset; }; }; diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 1d664ed5e..282a3eae9 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -138,12 +138,6 @@ gkfs_open(const std::string& path, mode_t mode, int flags) { return -1; } - if(flags & O_APPEND) { - LOG(ERROR, "`O_APPEND` flag is not supported"); - errno = ENOTSUP; - return -1; - } - // metadata object filled during create or stat gkfs::metadata::Metadata md{}; if(flags & O_CREAT) { @@ -855,38 +849,61 @@ gkfs_dup2(const int oldfd, const int newfd) { * @param buf * @param count * @param offset + * @param update_pos pos should only be updated for some write operations (see + * man 2 pwrite) * @return written size or -1 on error */ ssize_t gkfs_pwrite(std::shared_ptr file, const char* buf, - size_t count, off64_t offset) { + size_t count, off64_t offset, bool update_pos) { if(file->type() != gkfs::filemap::FileType::regular) { assert(file->type() == gkfs::filemap::FileType::directory); - LOG(WARNING, "Cannot read from directory"); + LOG(WARNING, "Cannot write to directory"); errno = EISDIR; return -1; } - auto path = make_shared(file->path()); - auto append_flag = file->get_flag(gkfs::filemap::OpenFile_flags::append); + auto path = make_unique(file->path()); + auto is_append = file->get_flag(gkfs::filemap::OpenFile_flags::append); - auto ret_update_size = gkfs::rpc::forward_update_metadentry_size( - *path, count, offset, append_flag); - auto err = ret_update_size.first; + auto ret_offset = gkfs::rpc::forward_update_metadentry_size( + *path, count, offset, is_append); + auto err = ret_offset.first; if(err) { LOG(ERROR, "update_metadentry_size() failed with err '{}'", err); errno = err; return -1; } - auto updated_size = ret_update_size.second; + if(is_append) { + // When append is set the EOF is set to the offset + // forward_update_metadentry_size returns. This is because it is an + // atomic operation on the server and reserves the space for this append + if(ret_offset.second == -1) { + LOG(ERROR, + "update_metadentry_size() received -1 as starting offset. " + "This occurs when the staring offset could not be extracted " + "from RocksDB's merge operations. Inform GekkoFS devs."); + errno = EIO; + return -1; + } + offset = ret_offset.second; + } - auto ret_write = gkfs::rpc::forward_write(*path, buf, append_flag, offset, - count, updated_size); + auto ret_write = gkfs::rpc::forward_write(*path, buf, offset, count); err = ret_write.first; if(err) { LOG(WARNING, "gkfs::rpc::forward_write() failed with err '{}'", err); errno = err; return -1; } + if(update_pos) { + // Update offset in file descriptor in the file map + file->pos(offset + ret_write.second); + } + if(static_cast(ret_write.second) != count) { + LOG(WARNING, + "gkfs::rpc::forward_write() wrote '{}' bytes instead of '{}'", + ret_write.second, count); + } return ret_write.second; // return written size } @@ -916,17 +933,9 @@ gkfs_pwrite_ws(int fd, const void* buf, size_t count, off64_t offset) { ssize_t gkfs_write(int fd, const void* buf, size_t count) { auto gkfs_fd = CTX->file_map()->get(fd); - auto pos = gkfs_fd->pos(); // retrieve the current offset - if(gkfs_fd->get_flag(gkfs::filemap::OpenFile_flags::append)) { - gkfs_lseek(gkfs_fd, 0, SEEK_END); - pos = gkfs_fd->pos(); // Pos should be updated with append - } + // call pwrite and update pos auto ret = gkfs_pwrite(gkfs_fd, reinterpret_cast(buf), count, - pos); - // Update offset in file descriptor in the file map - if(ret > 0) { - gkfs_fd->pos(pos + count); - } + gkfs_fd->pos(), true); return ret; } diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index b6dcd85e9..93d72efcd 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -46,23 +46,17 @@ namespace gkfs::rpc { * NOTE: No errno is defined here! */ -// TODO If we decide to keep this functionality with one segment, the function -// can be merged mostly. Code is mostly redundant - /** * Send an RPC request to write from a buffer. * @param path * @param buf * @param append_flag - * @param in_offset * @param write_size - * @param updated_metadentry_size * @return pair */ pair -forward_write(const string& path, const void* buf, const bool append_flag, - const off64_t in_offset, const size_t write_size, - const int64_t updated_metadentry_size) { +forward_write(const string& path, const void* buf, const off64_t offset, + const size_t write_size) { // import pow2-optimized arithmetic functions using namespace gkfs::utils::arithmetic; @@ -71,9 +65,6 @@ forward_write(const string& path, const void* buf, const bool append_flag, // Calculate chunkid boundaries and numbers so that daemons know in // which interval to look for chunks - off64_t offset = - append_flag ? in_offset : (updated_metadentry_size - write_size); - auto chnk_start = block_index(offset, gkfs::config::rpc::chunksize); auto chnk_end = block_index((offset + write_size) - 1, gkfs::config::rpc::chunksize); -- GitLab From 8d17b2474c8718f7b437274d5a8d694ed451f1e7 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 8 Mar 2023 18:33:39 +0100 Subject: [PATCH 2/7] Daemon: Adding append support --- include/common/metadata.hpp | 3 + include/common/rpc/rpc_types.hpp | 2 +- include/daemon/backend/metadata/db.hpp | 8 +- include/daemon/backend/metadata/merge.hpp | 44 ++++++++--- .../backend/metadata/metadata_backend.hpp | 13 ++-- .../backend/metadata/metadata_module.hpp | 20 ++++- .../backend/metadata/rocksdb_backend.hpp | 7 +- include/daemon/ops/metadentry.hpp | 2 +- src/common/metadata.cpp | 26 +++++++ src/daemon/backend/metadata/db.cpp | 14 +--- src/daemon/backend/metadata/merge.cpp | 73 ++++++++++--------- .../backend/metadata/metadata_module.cpp | 20 ++++- .../backend/metadata/rocksdb_backend.cpp | 59 ++++++++++++--- src/daemon/handler/srv_metadata.cpp | 10 +-- src/daemon/ops/metadentry.cpp | 15 ++-- 15 files changed, 221 insertions(+), 95 deletions(-) diff --git a/include/common/metadata.hpp b/include/common/metadata.hpp index 6332b511a..1ae0f4046 100644 --- a/include/common/metadata.hpp +++ b/include/common/metadata.hpp @@ -40,6 +40,9 @@ namespace gkfs::metadata { constexpr mode_t LINK_MODE = ((S_IRWXU | S_IRWXG | S_IRWXO) | S_IFLNK); +uint16_t +gen_unique_id(const std::string& path); + class Metadata { private: time_t atime_{}; // access time. gets updated on file access unless mounted diff --git a/include/common/rpc/rpc_types.hpp b/include/common/rpc/rpc_types.hpp index 81450ba68..19612c45c 100644 --- a/include/common/rpc/rpc_types.hpp +++ b/include/common/rpc/rpc_types.hpp @@ -73,7 +73,7 @@ MERCURY_GEN_PROC(rpc_update_metadentry_size_in_t, (hg_int64_t) (offset))((hg_bool_t) (append))) MERCURY_GEN_PROC(rpc_update_metadentry_size_out_t, - ((hg_int32_t) (err))((hg_int64_t) (ret_size))) + ((hg_int32_t) (err))((hg_int64_t) (ret_offset))) MERCURY_GEN_PROC(rpc_get_metadentry_size_out_t, ((hg_int32_t) (err))((hg_int64_t) (ret_size))) diff --git a/include/daemon/backend/metadata/db.hpp b/include/daemon/backend/metadata/db.hpp index 1114a9e31..b7eeb1417 100644 --- a/include/daemon/backend/metadata/db.hpp +++ b/include/daemon/backend/metadata/db.hpp @@ -50,7 +50,6 @@ namespace gkfs::metadata { constexpr auto rocksdb_backend = "rocksdb"; constexpr auto parallax_backend = "parallaxdb"; - class MetadataDB { private: std::string path_; @@ -121,12 +120,13 @@ public: * @brief Increases only the size part of the metadata entry via a RocksDB * Operand. * @param key KV store key - * @param size new size for entry + * @param io_size new size for entry * @param append * @throws DBException on failure, NotFoundException if entry doesn't exist */ - void - increase_size(const std::string& key, size_t size, bool append); + off_t + increase_size(const std::string& key, size_t io_size, off_t offset, + bool append); /** * @brief Decreases only the size part of the metadata entry via a RocksDB diff --git a/include/daemon/backend/metadata/merge.hpp b/include/daemon/backend/metadata/merge.hpp index ec1f7b8ec..ba354057f 100644 --- a/include/daemon/backend/metadata/merge.hpp +++ b/include/daemon/backend/metadata/merge.hpp @@ -29,7 +29,7 @@ #ifndef DB_MERGE_HPP #define DB_MERGE_HPP - +#include #include #include @@ -68,15 +68,18 @@ protected: }; class IncreaseSizeOperand : public MergeOperand { -public: - constexpr const static char separator = ','; - constexpr const static char true_char = 't'; - constexpr const static char false_char = 'f'; +private: + constexpr const static char serialize_sep = ','; + constexpr const static char serialize_end = '\0'; + + size_t size_; + uint16_t merge_id_; + bool append_; - size_t size; - bool append; +public: + IncreaseSizeOperand(size_t size); - IncreaseSizeOperand(size_t size, bool append); + IncreaseSizeOperand(size_t size, uint16_t merge_id, bool append); explicit IncreaseSizeOperand(const rdb::Slice& serialized_op); @@ -85,12 +88,28 @@ public: std::string serialize_params() const override; + + size_t + size() const { + return size_; + } + + uint16_t + merge_id() const { + return merge_id_; + } + + bool + append() const { + return append_; + } }; class DecreaseSizeOperand : public MergeOperand { -public: - size_t size; +private: + size_t size_; +public: explicit DecreaseSizeOperand(size_t size); explicit DecreaseSizeOperand(const rdb::Slice& serialized_op); @@ -100,6 +119,11 @@ public: std::string serialize_params() const override; + + size_t + size() const { + return size_; + } }; class CreateOperand : public MergeOperand { diff --git a/include/daemon/backend/metadata/metadata_backend.hpp b/include/daemon/backend/metadata/metadata_backend.hpp index a22392496..8ba98013c 100644 --- a/include/daemon/backend/metadata/metadata_backend.hpp +++ b/include/daemon/backend/metadata/metadata_backend.hpp @@ -60,8 +60,9 @@ public: update(const std::string& old_key, const std::string& new_key, const std::string& val) = 0; - virtual void - increase_size(const std::string& key, size_t size, bool append) = 0; + virtual off_t + increase_size(const std::string& key, size_t size, off_t offset, + bool append) = 0; virtual void decrease_size(const std::string& key, size_t size) = 0; @@ -114,9 +115,11 @@ public: static_cast(*this).update_impl(old_key, new_key, val); } - void - increase_size(const std::string& key, size_t size, bool append) { - static_cast(*this).increase_size_impl(key, size, append); + off_t + increase_size(const std::string& key, size_t size, off_t offset, + bool append) { + return static_cast(*this).increase_size_impl(key, size, offset, + append); } void diff --git a/include/daemon/backend/metadata/metadata_module.hpp b/include/daemon/backend/metadata/metadata_module.hpp index 4c0376c72..75b2ce6a9 100644 --- a/include/daemon/backend/metadata/metadata_module.hpp +++ b/include/daemon/backend/metadata/metadata_module.hpp @@ -30,8 +30,9 @@ #define GEKKOFS_DAEMON_METADATA_LOGGING_HPP #include +#include -namespace gkfs::data { +namespace gkfs::metadata { class MetadataModule { @@ -39,6 +40,8 @@ private: MetadataModule() = default; std::shared_ptr log_; + std::map append_offset_reserve_{}; + std::mutex append_offset_reserve_mutex_{}; public: static constexpr const char* LOGGER_NAME = "MetadataModule"; @@ -59,12 +62,21 @@ public: void log(const std::shared_ptr& log); + + const std::map& + append_offset_reserve() const; + + void + append_offset_reserve_put(uint16_t merge_id, size_t offset); + + size_t + append_offset_reserve_get_and_erase(uint16_t merge_id); }; #define GKFS_METADATA_MOD \ - (static_cast( \ - gkfs::data::MetadataModule::getInstance())) + (static_cast( \ + gkfs::metadata::MetadataModule::getInstance())) -} // namespace gkfs::data +} // namespace gkfs::metadata #endif // GEKKOFS_DAEMON_METADATA_LOGGING_HPP diff --git a/include/daemon/backend/metadata/rocksdb_backend.hpp b/include/daemon/backend/metadata/rocksdb_backend.hpp index d889c2263..c7b331e03 100644 --- a/include/daemon/backend/metadata/rocksdb_backend.hpp +++ b/include/daemon/backend/metadata/rocksdb_backend.hpp @@ -130,12 +130,13 @@ public: * Updates the size on the metadata * Operation. E.g., called before a write() call * @param key - * @param size + * @param io_size * @param append * @throws DBException on failure */ - void - increase_size_impl(const std::string& key, size_t size, bool append); + off_t + increase_size_impl(const std::string& key, size_t io_size, off_t offset, + bool append); /** * Decreases the size on the metadata diff --git a/include/daemon/ops/metadentry.hpp b/include/daemon/ops/metadentry.hpp index 2aec3cadf..909aa5a5a 100644 --- a/include/daemon/ops/metadentry.hpp +++ b/include/daemon/ops/metadentry.hpp @@ -55,7 +55,7 @@ create(const std::string& path, Metadata& md); void update(const std::string& path, Metadata& md); -void +off_t update_size(const std::string& path, size_t io_size, off_t offset, bool append); void diff --git a/src/common/metadata.cpp b/src/common/metadata.cpp index d5836216a..26f60ced5 100644 --- a/src/common/metadata.cpp +++ b/src/common/metadata.cpp @@ -38,11 +38,37 @@ extern "C" { #include #include +#include namespace gkfs::metadata { static const char MSP = '|'; // metadata separator +/** + * Generate a unique ID for a given path + * @param path + * @return unique ID + */ +uint16_t +gen_unique_id(const std::string& path) { + // Generate a random salt value using a pseudo-random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, + std::numeric_limits::max()); + auto salt = static_cast(dis(gen)); + + // Concatenate the identifier and salt values into a single string + auto input_str = fmt::format("{}{}", path, salt); + + // Use std::hash function to generate a hash value from the input string + std::hash const hasher; + auto hash_value = hasher(input_str); + + // Use the lower 16 bits of the hash value as the unique ID + return static_cast(hash_value & 0xFFFF); +} + Metadata::Metadata(const mode_t mode) : atime_(), mtime_(), ctime_(), mode_(mode), link_count_(0), size_(0), blocks_(0) { diff --git a/src/daemon/backend/metadata/db.cpp b/src/daemon/backend/metadata/db.cpp index fcf629fa3..341dd2cfb 100644 --- a/src/daemon/backend/metadata/db.cpp +++ b/src/daemon/backend/metadata/db.cpp @@ -33,7 +33,6 @@ #include #include -#include #include extern "C" { @@ -124,26 +123,22 @@ MetadataDB::put(const std::string& key, const std::string& val) { */ void MetadataDB::put_no_exist(const std::string& key, const std::string& val) { - backend_->put_no_exist(key, val); } void MetadataDB::remove(const std::string& key) { - backend_->remove(key); } bool MetadataDB::exists(const std::string& key) { - return backend_->exists(key); } void MetadataDB::update(const std::string& old_key, const std::string& new_key, const std::string& val) { - backend_->update(old_key, new_key, val); } @@ -152,10 +147,10 @@ MetadataDB::update(const std::string& old_key, const std::string& new_key, * E.g., called before a write() call * @endinternal */ -void -MetadataDB::increase_size(const std::string& key, size_t size, bool append) { - - backend_->increase_size(key, size, append); +off_t +MetadataDB::increase_size(const std::string& key, size_t io_size, off_t offset, + bool append) { + return backend_->increase_size(key, io_size, offset, append); } /** @@ -165,7 +160,6 @@ MetadataDB::increase_size(const std::string& key, size_t size, bool append) { */ void MetadataDB::decrease_size(const std::string& key, size_t size) { - backend_->decrease_size(key, size); } diff --git a/src/daemon/backend/metadata/merge.cpp b/src/daemon/backend/metadata/merge.cpp index 16cf44434..a35259b66 100644 --- a/src/daemon/backend/metadata/merge.cpp +++ b/src/daemon/backend/metadata/merge.cpp @@ -60,24 +60,29 @@ MergeOperand::get_params(const rdb::Slice& serialized_op) { return {serialized_op.data() + 2, serialized_op.size() - 2}; } -IncreaseSizeOperand::IncreaseSizeOperand(const size_t size, const bool append) - : size(size), append(append) {} +IncreaseSizeOperand::IncreaseSizeOperand(const size_t size) + : size_(size), merge_id_(0), append_(false) {} + +IncreaseSizeOperand::IncreaseSizeOperand(const size_t size, + const uint16_t merge_id, + const bool append) + : size_(size), merge_id_(merge_id), append_(append) {} IncreaseSizeOperand::IncreaseSizeOperand(const rdb::Slice& serialized_op) { - size_t chrs_parsed = 0; size_t read = 0; - // Parse size - size = ::stoul(serialized_op.data() + chrs_parsed, &read); - chrs_parsed += read + 1; - assert(serialized_op[chrs_parsed - 1] == separator); - - // Parse append flag - assert(serialized_op[chrs_parsed] == false_char || - serialized_op[chrs_parsed] == true_char); - append = serialized_op[chrs_parsed] != false_char; - // check that we consumed all the input string - assert(chrs_parsed + 1 == serialized_op.size()); + size_ = std::stoul(serialized_op.data(), &read); + if(read + 1 == serialized_op.size() || + serialized_op[read] == serialize_end) { + merge_id_ = 0; + append_ = false; + return; + } + assert(serialized_op[read] == serialize_sep); + // Parse merge id + merge_id_ = static_cast( + std::stoul(serialized_op.data() + read + 1, nullptr)); + append_ = true; } OperandID @@ -87,23 +92,24 @@ IncreaseSizeOperand::id() const { string IncreaseSizeOperand::serialize_params() const { - string s; - s.reserve(3); - s += ::to_string(size); - s += this->separator; - s += !append ? false_char : true_char; - return s; + // serialize_end avoids rogue characters in the serialized string + if(append_) + return fmt::format("{}{}{}{}", size_, serialize_sep, merge_id_, + serialize_end); + else { + return fmt::format("{}{}", size_, serialize_end); + } } -DecreaseSizeOperand::DecreaseSizeOperand(const size_t size) : size(size) {} +DecreaseSizeOperand::DecreaseSizeOperand(const size_t size) : size_(size) {} DecreaseSizeOperand::DecreaseSizeOperand(const rdb::Slice& serialized_op) { // Parse size size_t read = 0; // we need to convert serialized_op to a string because it doesn't contain // the leading slash needed by stoul - size = ::stoul(serialized_op.ToString(), &read); + size_ = ::stoul(serialized_op.ToString(), &read); // check that we consumed all the input string assert(read == serialized_op.size()); } @@ -115,7 +121,7 @@ DecreaseSizeOperand::id() const { string DecreaseSizeOperand::serialize_params() const { - return ::to_string(size); + return ::to_string(size_); } @@ -138,15 +144,11 @@ MetadataMergeOperator::FullMergeV2(const MergeOperationInput& merge_in, string prev_md_value; auto ops_it = merge_in.operand_list.cbegin(); - if(merge_in.existing_value == nullptr) { // The key to operate on doesn't exists in DB if(MergeOperand::get_id(ops_it[0]) != OperandID::create) { throw ::runtime_error( "Merge operation failed: key do not exists and first operand is not a creation"); - // TODO use logger to print err info; - // Log(logger, "Key %s do not exists", - // existing_value->ToString().c_str()); return false; } prev_md_value = MergeOperand::get_params(ops_it[0]).ToString(); ops_it++; @@ -166,16 +168,21 @@ MetadataMergeOperator::FullMergeV2(const MergeOperationInput& merge_in, if(operand_id == OperandID::increase_size) { auto op = IncreaseSizeOperand(parameters); - if(op.append) { - // append mode, just increment file - fsize += op.size; + if(op.append()) { + auto curr_offset = fsize; + // append mode, just increment file size + fsize += op.size(); + // save the offset where this append operation should start + // it is retrieved later in RocksDBBackend::increase_size_impl() + GKFS_METADATA_MOD->append_offset_reserve_put(op.merge_id(), + curr_offset); } else { - fsize = ::max(op.size, fsize); + fsize = ::max(op.size(), fsize); } } else if(operand_id == OperandID::decrease_size) { auto op = DecreaseSizeOperand(parameters); - assert(op.size < fsize); // we assume no concurrency here - fsize = op.size; + assert(op.size() < fsize); // we assume no concurrency here + fsize = op.size(); } else if(operand_id == OperandID::create) { continue; } else { diff --git a/src/daemon/backend/metadata/metadata_module.cpp b/src/daemon/backend/metadata/metadata_module.cpp index ad7d51567..9c10698d2 100644 --- a/src/daemon/backend/metadata/metadata_module.cpp +++ b/src/daemon/backend/metadata/metadata_module.cpp @@ -28,7 +28,7 @@ #include -namespace gkfs::data { +namespace gkfs::metadata { const std::shared_ptr& MetadataModule::log() const { @@ -39,5 +39,21 @@ void MetadataModule::log(const std::shared_ptr& log) { MetadataModule::log_ = log; } +const std::map& +MetadataModule::append_offset_reserve() const { + return append_offset_reserve_; +} +void +MetadataModule::append_offset_reserve_put(uint16_t merge_id, size_t offset) { + std::lock_guard lock(append_offset_reserve_mutex_); + append_offset_reserve_[merge_id] = offset; +} +size_t +MetadataModule::append_offset_reserve_get_and_erase(uint16_t merge_id) { + std::lock_guard lock(append_offset_reserve_mutex_); + auto out = append_offset_reserve_.at(merge_id); + append_offset_reserve_.erase(merge_id); + return out; +} -} // namespace gkfs::data +} // namespace gkfs::metadata diff --git a/src/daemon/backend/metadata/rocksdb_backend.cpp b/src/daemon/backend/metadata/rocksdb_backend.cpp index 99a0263d1..17916ca6c 100644 --- a/src/daemon/backend/metadata/rocksdb_backend.cpp +++ b/src/daemon/backend/metadata/rocksdb_backend.cpp @@ -197,20 +197,59 @@ RocksDBBackend::update_impl(const std::string& old_key, /** * Updates the size on the metadata * Operation. E.g., called before a write() call + * + * A special case represents the append operation. Since multiple processes + * could want to append a file in parallel, the corresponding offsets where the + * write operation starts, needs to be reserved. This is an expensive operation + * as we need to force a RocksDB Merge operation to receive the starting offset + * for this write request. + * * @param key - * @param size + * @param io_size + * @param offset * @param append - * @throws DBException on failure + * @return offset where the write operation should start. This is only used when + * append is set */ -void -RocksDBBackend::increase_size_impl(const std::string& key, size_t size, - bool append) { - - auto uop = IncreaseSizeOperand(size, append); - auto s = db_->Merge(write_opts_, key, uop.serialize()); - if(!s.ok()) { - throw_status_excpt(s); +off_t +RocksDBBackend::increase_size_impl(const std::string& key, size_t io_size, + off_t offset, bool append) { + off_t res_offset = -1; + if(append) { + auto merge_id = gkfs::metadata::gen_unique_id(key); + // GKFS_METADATA_MOD->log()->info("{}() writing to file {} with + // merge_id {} io_size {} offset {} append {}", + // __func__, key, merge_id, + // io_size, offset, append); + // no offset needed because new size is current file size + io_size + auto uop = IncreaseSizeOperand(io_size, merge_id, append); + auto s = db_->Merge(write_opts_, key, uop.serialize()); + if(!s.ok()) { + throw_status_excpt(s); + } else { + // force merge operation to run + get_impl(key); + try { + // the offset was added during FullMergeV2() call + res_offset = + GKFS_METADATA_MOD->append_offset_reserve_get_and_erase( + merge_id); + } catch(std::out_of_range& e) { + GKFS_METADATA_MOD->log()->warn( + "{}() - out_of_range exception: {} when attempting to get offset for key {}", + __func__, e.what(), key); + } + } + } else { + // In the standard case we simply add the I/O request size to the + // offset. + auto uop = IncreaseSizeOperand(offset + io_size); + auto s = db_->Merge(write_opts_, key, uop.serialize()); + if(!s.ok()) { + throw_status_excpt(s); + } } + return res_offset; } /** diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index 7dcec929d..cc6fd466a 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -411,7 +411,6 @@ rpc_srv_update_metadentry_size(hg_handle_t handle) { rpc_update_metadentry_size_in_t in{}; rpc_update_metadentry_size_out_t out{}; - auto ret = margo_get_input(handle, &in); if(ret != HG_SUCCESS) GKFS_DATA->spdlogger()->error( @@ -422,13 +421,10 @@ rpc_srv_update_metadentry_size(hg_handle_t handle) { in.path, in.size, in.offset, in.append); try { - gkfs::metadata::update_size(in.path, in.size, in.offset, - (in.append == HG_TRUE)); + auto append = in.append == HG_TRUE; + out.ret_offset = gkfs::metadata::update_size(in.path, in.size, + in.offset, append); out.err = 0; - // TODO the actual size of the file could be different after the size - // update - // do to concurrency on size - out.ret_size = in.size + in.offset; } catch(const gkfs::metadata::NotFoundException& e) { GKFS_DATA->spdlogger()->debug("{}() Entry not found: '{}'", __func__, in.path); diff --git a/src/daemon/ops/metadentry.cpp b/src/daemon/ops/metadentry.cpp index e73ca0aae..e80655fb9 100644 --- a/src/daemon/ops/metadentry.cpp +++ b/src/daemon/ops/metadentry.cpp @@ -129,15 +129,20 @@ update(const string& path, Metadata& md) { } /** - * Updates a metadentry's size atomically and returns the corresponding size - * after update + * Updates a metadentry's size atomically and returns the starting offset for + * the I/O operation. This is primarily necessary for parallel write operations, + * e.g., with O_APPEND, where the EOF might have changed since opening the file. + * Therefore, we use update_size to assign a safe write interval to each + * parallel write operation. * @param path * @param io_size - * @return the updated size + * @param offset + * @param append + * @return starting offset for I/O operation */ -void +off_t update_size(const string& path, size_t io_size, off64_t offset, bool append) { - GKFS_DATA->mdb()->increase_size(path, io_size + offset, append); + return GKFS_DATA->mdb()->increase_size(path, io_size, offset, append); } /** -- GitLab From bfece2ab4401caab641c8cdee486363c46b93922 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 8 Mar 2023 14:27:55 +0100 Subject: [PATCH 3/7] Support append for Parallax --- .../backend/metadata/parallax_backend.hpp | 12 ++++++++---- .../daemon/backend/metadata/rocksdb_backend.hpp | 4 +++- .../backend/metadata/parallax_backend.cpp | 17 ++++++++++------- src/daemon/backend/metadata/rocksdb_backend.cpp | 10 +++------- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/include/daemon/backend/metadata/parallax_backend.hpp b/include/daemon/backend/metadata/parallax_backend.hpp index 948e15d48..be6154267 100644 --- a/include/daemon/backend/metadata/parallax_backend.hpp +++ b/include/daemon/backend/metadata/parallax_backend.hpp @@ -33,6 +33,7 @@ #include #include #include +#include extern "C" { #include } @@ -145,12 +146,15 @@ public: * Updates the size on the metadata * Operation. E.g., called before a write() call * @param key - * @param size + * @param io_size + * @param offset * @param append - * @throws DBException on failure + * @return offset where the write operation should start. This is only used + * when append is set */ - void - increase_size_impl(const std::string& key, size_t size, bool append); + off_t + increase_size_impl(const std::string& key, size_t io_size, off_t offset, + bool append); /** * Decreases the size on the metadata diff --git a/include/daemon/backend/metadata/rocksdb_backend.hpp b/include/daemon/backend/metadata/rocksdb_backend.hpp index c7b331e03..7a8257b7c 100644 --- a/include/daemon/backend/metadata/rocksdb_backend.hpp +++ b/include/daemon/backend/metadata/rocksdb_backend.hpp @@ -131,8 +131,10 @@ public: * Operation. E.g., called before a write() call * @param key * @param io_size + * @param offset * @param append - * @throws DBException on failure + * @return offset where the write operation should start. This is only used + * when append is set */ off_t increase_size_impl(const std::string& key, size_t io_size, off_t offset, diff --git a/src/daemon/backend/metadata/parallax_backend.cpp b/src/daemon/backend/metadata/parallax_backend.cpp index 484f58da4..d8282cce9 100644 --- a/src/daemon/backend/metadata/parallax_backend.cpp +++ b/src/daemon/backend/metadata/parallax_backend.cpp @@ -331,18 +331,21 @@ ParallaxBackend::update_impl(const std::string& old_key, * @param append * @throws DBException on failure */ -void -ParallaxBackend::increase_size_impl(const std::string& key, size_t size, - bool append) { +off_t +ParallaxBackend::increase_size_impl(const std::string& key, size_t io_size, + off_t offset, bool append) { lock_guard lock_guard(parallax_mutex_); - + off_t out_offset = -1; auto value = get(key); // Decompress string Metadata md(value); - if(append) - size += md.size(); - md.size(size); + if(append) { + out_offset = md.size(); + md.size(md.size() + io_size); + } else + md.size(offset + io_size); update(key, key, md.serialize()); + return out_offset; } /** diff --git a/src/daemon/backend/metadata/rocksdb_backend.cpp b/src/daemon/backend/metadata/rocksdb_backend.cpp index 17916ca6c..75eba14e7 100644 --- a/src/daemon/backend/metadata/rocksdb_backend.cpp +++ b/src/daemon/backend/metadata/rocksdb_backend.cpp @@ -214,13 +214,9 @@ RocksDBBackend::update_impl(const std::string& old_key, off_t RocksDBBackend::increase_size_impl(const std::string& key, size_t io_size, off_t offset, bool append) { - off_t res_offset = -1; + off_t out_offset = -1; if(append) { auto merge_id = gkfs::metadata::gen_unique_id(key); - // GKFS_METADATA_MOD->log()->info("{}() writing to file {} with - // merge_id {} io_size {} offset {} append {}", - // __func__, key, merge_id, - // io_size, offset, append); // no offset needed because new size is current file size + io_size auto uop = IncreaseSizeOperand(io_size, merge_id, append); auto s = db_->Merge(write_opts_, key, uop.serialize()); @@ -231,7 +227,7 @@ RocksDBBackend::increase_size_impl(const std::string& key, size_t io_size, get_impl(key); try { // the offset was added during FullMergeV2() call - res_offset = + out_offset = GKFS_METADATA_MOD->append_offset_reserve_get_and_erase( merge_id); } catch(std::out_of_range& e) { @@ -249,7 +245,7 @@ RocksDBBackend::increase_size_impl(const std::string& key, size_t io_size, throw_status_excpt(s); } } - return res_offset; + return out_offset; } /** -- GitLab From e290d905f6687e559f870d41430dff69bd17d3a9 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 8 Mar 2023 15:33:09 +0100 Subject: [PATCH 4/7] Cleanup and adding documentation --- include/daemon/backend/metadata/db.hpp | 4 +- include/daemon/backend/metadata/merge.hpp | 45 ++++++++++++-- .../backend/metadata/metadata_module.hpp | 26 ++++++++- include/daemon/ops/metadentry.hpp | 55 ++++++++++++++++++ src/daemon/backend/metadata/db.cpp | 16 ----- src/daemon/backend/metadata/merge.cpp | 17 +++++- .../backend/metadata/metadata_module.cpp | 3 + src/daemon/handler/srv_metadata.cpp | 5 +- src/daemon/ops/metadentry.cpp | 58 ++----------------- 9 files changed, 148 insertions(+), 81 deletions(-) diff --git a/include/daemon/backend/metadata/db.hpp b/include/daemon/backend/metadata/db.hpp index b7eeb1417..8753eb13e 100644 --- a/include/daemon/backend/metadata/db.hpp +++ b/include/daemon/backend/metadata/db.hpp @@ -118,7 +118,7 @@ public: /** * @brief Increases only the size part of the metadata entry via a RocksDB - * Operand. + * Operand as part of a write operation. * @param key KV store key * @param io_size new size for entry * @param append @@ -130,7 +130,7 @@ public: /** * @brief Decreases only the size part of the metadata entry via a RocksDB - * Operand/ + * Operand. This is used for truncate, e.g.. * @param key KV store key * @param size new size for entry * @throws DBException on failure, NotFoundException if entry doesn't exist diff --git a/include/daemon/backend/metadata/merge.hpp b/include/daemon/backend/metadata/merge.hpp index ba354057f..52fe30bcd 100644 --- a/include/daemon/backend/metadata/merge.hpp +++ b/include/daemon/backend/metadata/merge.hpp @@ -37,12 +37,18 @@ namespace rdb = rocksdb; namespace gkfs::metadata { +/** + * @brief Merge operator classifiers + */ enum class OperandID : char { increase_size = 'i', decrease_size = 'd', create = 'c' }; +/** + * @brief Base class for merge operands + */ class MergeOperand { public: constexpr static char operand_id_suffix = ':'; @@ -66,7 +72,9 @@ protected: virtual OperandID id() const = 0; }; - +/** + * @brief Increase size operand + */ class IncreaseSizeOperand : public MergeOperand { private: constexpr const static char serialize_sep = ','; @@ -104,7 +112,9 @@ public: return append_; } }; - +/** + * @brief Decrease size operand + */ class DecreaseSizeOperand : public MergeOperand { private: size_t size_; @@ -125,7 +135,9 @@ public: return size_; } }; - +/** + * @brief Create operand + */ class CreateOperand : public MergeOperand { public: std::string metadata; @@ -138,24 +150,49 @@ public: std::string serialize_params() const override; }; - +/** + * @brief Merge operator class passed to RocksDB, used during merge operations + */ class MetadataMergeOperator : public rocksdb::MergeOperator { public: ~MetadataMergeOperator() override = default; + /** + * @brief Merges all operands in chronological order for the same key + * @param op1 Input operand + * @param op2 Output operand + * @return Result of the merge operation + */ bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override; + /** + * @brief TODO functionality unclear. Currently unused. + * @param key + * @param operand_list + * @param new_value + * @param logger + * @return + */ bool PartialMergeMulti(const rdb::Slice& key, const std::deque& operand_list, std::string* new_value, rdb::Logger* logger) const override; + /** + * @brief Returns the name of this Merge operator + * @return + */ const char* Name() const override; + /** + * @brief Merge Operator configuration which allows merges with just a + * single operand. + * @return + */ bool AllowSingleOperand() const override; }; diff --git a/include/daemon/backend/metadata/metadata_module.hpp b/include/daemon/backend/metadata/metadata_module.hpp index 75b2ce6a9..c9cd569e5 100644 --- a/include/daemon/backend/metadata/metadata_module.hpp +++ b/include/daemon/backend/metadata/metadata_module.hpp @@ -34,18 +34,29 @@ namespace gkfs::metadata { +/** + * @brief MetadataModule is a singleton class that holds global data structures + * for all metadata operations. + */ class MetadataModule { private: MetadataModule() = default; - std::shared_ptr log_; + std::shared_ptr log_; ///< Metadata logger + ///< Map to remember and assign offsets to write append operations std::map append_offset_reserve_{}; - std::mutex append_offset_reserve_mutex_{}; + std::mutex append_offset_reserve_mutex_{}; ///< Mutex to protect + ///< append_offset_reserve_ public: + ///< Logger name static constexpr const char* LOGGER_NAME = "MetadataModule"; + /** + * @brief Get the MetadataModule singleton instance + * @return MetadataModule instance + */ static MetadataModule* getInstance() { static MetadataModule instance; @@ -66,9 +77,20 @@ public: const std::map& append_offset_reserve() const; + /** + * @brief Inserts entry into append_offset_reserve_ + * @param merge_id Merge ID + * @param offset Offset to reserve + */ void append_offset_reserve_put(uint16_t merge_id, size_t offset); + /** + * @brief Gets and erases entry from append_offset_reserve_ + * @param merge_id Merge ID + * @return Offset reserved for merge_id + * @throws std::out_of_range if merge_id is not in append_offset_reserve_ + */ size_t append_offset_reserve_get_and_erase(uint16_t merge_id); }; diff --git a/include/daemon/ops/metadentry.hpp b/include/daemon/ops/metadentry.hpp index 909aa5a5a..691aca399 100644 --- a/include/daemon/ops/metadentry.hpp +++ b/include/daemon/ops/metadentry.hpp @@ -34,30 +34,85 @@ namespace gkfs::metadata { +/** + * @brief Returns the metadata of an object at a specific path. The metadata can + * be of dummy values if configured + * @param path + * @param attr + * @return + */ Metadata get(const std::string& path); +/** + * @brief Get metadentry string only for path + * @param path + * @return + */ std::string get_str(const std::string& path); +/** + * @brief Gets the size of a metadentry + * @param path + * @param ret_size (return val) + * @return err + */ size_t get_size(const std::string& path); +/** + * @brief Returns a vector of directory entries for given directory + * @param dir + * @return + */ std::vector> get_dirents(const std::string& dir); +/** + * @brief Returns a vector of directory entries for given directory (extended + * version) + * @param dir + * @return + */ std::vector> get_dirents_extended(const std::string& dir); +/** + * @brief Creates metadata (if required) and dentry at the same time + * @param path + * @param mode + * @throws DBException + */ void create(const std::string& path, Metadata& md); +/** + * @brief Update metadentry by given Metadata object and path + * @param path + * @param md + */ void update(const std::string& path, Metadata& md); +/** + * @brief Updates a metadentry's size atomically and returns the starting offset + * for the I/O operation. + * @param path + * @param io_size + * @param offset + * @param append + * @return starting offset for I/O operation + */ off_t update_size(const std::string& path, size_t io_size, off_t offset, bool append); +/** + * @brief Remove metadentry if exists + * @param path + * @return + * @throws gkfs::metadata::DBException + */ void remove(const std::string& path); diff --git a/src/daemon/backend/metadata/db.cpp b/src/daemon/backend/metadata/db.cpp index 341dd2cfb..b15b3fca6 100644 --- a/src/daemon/backend/metadata/db.cpp +++ b/src/daemon/backend/metadata/db.cpp @@ -97,12 +97,6 @@ MetadataDB::~MetadataDB() { backend_.reset(); } -/** - * Gets a KV store value for a key - * @param key - * @return value - * @throws DBException on failure, NotFoundException if entry doesn't exist - */ std::string MetadataDB::get(const std::string& key) const { return backend_->get(key); @@ -142,22 +136,12 @@ MetadataDB::update(const std::string& old_key, const std::string& new_key, backend_->update(old_key, new_key, val); } -/** - * @internal - * E.g., called before a write() call - * @endinternal - */ off_t MetadataDB::increase_size(const std::string& key, size_t io_size, off_t offset, bool append) { return backend_->increase_size(key, io_size, offset, append); } -/** - * @internal - * E.g., called before a truncate() call - * @endinternal - */ void MetadataDB::decrease_size(const std::string& key, size_t size) { backend_->decrease_size(key, size); diff --git a/src/daemon/backend/metadata/merge.cpp b/src/daemon/backend/metadata/merge.cpp index a35259b66..4f5d134ef 100644 --- a/src/daemon/backend/metadata/merge.cpp +++ b/src/daemon/backend/metadata/merge.cpp @@ -137,7 +137,22 @@ CreateOperand::serialize_params() const { return metadata; } - +/** + * @internal + * Merges all operands in chronological order for the same key. + * + * This is called before each Get() operation, among others. Therefore, it is + * not possible to return a result for a specific merge operand. The return as + * well as merge_out->new_value is for RocksDB internals The new value is the + * merged value of multiple value that is written to one key. + * + * Append operations receive special treatment here as the corresponding write + * function that triggered the size update needs the starting offset. In + * parallel append operations this is crucial. This is done by accessing a mutex + * protected std::map which may incur performance overheads for append + * operations. + * @endinternal + */ bool MetadataMergeOperator::FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const { diff --git a/src/daemon/backend/metadata/metadata_module.cpp b/src/daemon/backend/metadata/metadata_module.cpp index 9c10698d2..f77b36a72 100644 --- a/src/daemon/backend/metadata/metadata_module.cpp +++ b/src/daemon/backend/metadata/metadata_module.cpp @@ -39,15 +39,18 @@ void MetadataModule::log(const std::shared_ptr& log) { MetadataModule::log_ = log; } + const std::map& MetadataModule::append_offset_reserve() const { return append_offset_reserve_; } + void MetadataModule::append_offset_reserve_put(uint16_t merge_id, size_t offset) { std::lock_guard lock(append_offset_reserve_mutex_); append_offset_reserve_[merge_id] = offset; } + size_t MetadataModule::append_offset_reserve_get_and_erase(uint16_t merge_id) { std::lock_guard lock(append_offset_reserve_mutex_); diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index cc6fd466a..09dc9d318 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -421,9 +421,8 @@ rpc_srv_update_metadentry_size(hg_handle_t handle) { in.path, in.size, in.offset, in.append); try { - auto append = in.append == HG_TRUE; - out.ret_offset = gkfs::metadata::update_size(in.path, in.size, - in.offset, append); + out.ret_offset = gkfs::metadata::update_size( + in.path, in.size, in.offset, (in.append == HG_TRUE)); out.err = 0; } catch(const gkfs::metadata::NotFoundException& e) { GKFS_DATA->spdlogger()->debug("{}() Entry not found: '{}'", __func__, diff --git a/src/daemon/ops/metadentry.cpp b/src/daemon/ops/metadentry.cpp index e80655fb9..c456ed4a9 100644 --- a/src/daemon/ops/metadentry.cpp +++ b/src/daemon/ops/metadentry.cpp @@ -35,66 +35,31 @@ using namespace std; namespace gkfs::metadata { -/** - * Returns the metadata of an object at a specific path. The metadata can be of - * dummy values if configured - * @param path - * @param attr - * @return - */ Metadata get(const std::string& path) { return Metadata(get_str(path)); } -/** - * Get metadentry string only for path - * @param path - * @return - */ std::string get_str(const std::string& path) { return GKFS_DATA->mdb()->get(path); } -/** - * Gets the size of a metadentry - * @param path - * @param ret_size (return val) - * @return err - */ size_t get_size(const string& path) { return get(path).size(); } -/** - * Returns a vector of directory entries for given directory - * @param dir - * @return - */ std::vector> get_dirents(const std::string& dir) { return GKFS_DATA->mdb()->get_dirents(dir); } -/** - * Returns a vector of directory entries for given directory (extended version) - * @param dir - * @return - */ std::vector> get_dirents_extended(const std::string& dir) { return GKFS_DATA->mdb()->get_dirents_extended(dir); } - -/** - * Creates metadata (if required) and dentry at the same time - * @param path - * @param mode - * @throws DBException - */ void create(const std::string& path, Metadata& md) { @@ -118,39 +83,26 @@ create(const std::string& path, Metadata& md) { } } -/** - * Update metadentry by given Metadata object and path - * @param path - * @param md - */ void update(const string& path, Metadata& md) { GKFS_DATA->mdb()->update(path, path, md.serialize()); } /** - * Updates a metadentry's size atomically and returns the starting offset for - * the I/O operation. This is primarily necessary for parallel write operations, + * @internal + * Updates a metadentry's size atomically and returns the starting offset + * for the I/O operation. + * This is primarily necessary for parallel write operations, * e.g., with O_APPEND, where the EOF might have changed since opening the file. * Therefore, we use update_size to assign a safe write interval to each * parallel write operation. - * @param path - * @param io_size - * @param offset - * @param append - * @return starting offset for I/O operation + * @endinternal */ off_t update_size(const string& path, size_t io_size, off64_t offset, bool append) { return GKFS_DATA->mdb()->increase_size(path, io_size, offset, append); } -/** - * Remove metadentry if exists - * @param path - * @return - * @throws gkfs::metadata::DBException - */ void remove(const string& path) { /* -- GitLab From 32efe80dda0f2440437a005ef219cd5ea47da2df Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 8 Mar 2023 18:19:35 +0100 Subject: [PATCH 5/7] Adding append tests --- .../harness/gkfs.io/syscall_coverage.cpp | 22 ++++++++-- tests/integration/harness/gkfs.io/write.cpp | 11 ++++- tests/integration/harness/gkfs.py | 12 ++--- .../operations/test_write_operations.py | 44 ++++++++++++++++--- .../syscalls/test_error_operations.py | 2 +- 5 files changed, 72 insertions(+), 19 deletions(-) diff --git a/tests/integration/harness/gkfs.io/syscall_coverage.cpp b/tests/integration/harness/gkfs.io/syscall_coverage.cpp index abc24f58f..5470b63b4 100644 --- a/tests/integration/harness/gkfs.io/syscall_coverage.cpp +++ b/tests/integration/harness/gkfs.io/syscall_coverage.cpp @@ -150,7 +150,7 @@ syscall_coverage_exec(const syscall_coverage_options& opts) { return; } - // lssek external + // lseek external rv = ::lseek(fdext, 0, SEEK_SET); if(rv < 0) { output("lseek", rv, opts); @@ -187,7 +187,7 @@ syscall_coverage_exec(const syscall_coverage_options& opts) { return; } - + // fchmod internal rv = ::fchmod(fd, 0777); if(errno != ENOTSUP) { @@ -381,12 +381,26 @@ syscall_coverage_exec(const syscall_coverage_options& opts) { return; } - rv = ::renameat(AT_FDCWD, path1.c_str(), AT_FDCWD, - path2.c_str()); + rv = ::renameat(AT_FDCWD, path1.c_str(), AT_FDCWD, path2.c_str()); if(rv < 0) { output("renameat_ext_to_ext", rv, opts); return; } + + // open with O_APPEND + std::string path_append = "/tmp/" + pid + "test_append"; + auto fd_append = ::open(path1.c_str(), O_CREAT | O_WRONLY | O_APPEND, 0644); + if(fd_append < 0) { + output("open with O_APPEND", fd_append, opts); + return; + } + rv = ::write(fd_append, "testappend", 10); + if(rv < 0) { + output("open with O_APPEND", rv, opts); + return; + } + ::close(fd_append); + // sys_open rv = ::syscall(SYS_open, opts.pathname.c_str(), O_RDONLY, 0); if(rv < 0) { diff --git a/tests/integration/harness/gkfs.io/write.cpp b/tests/integration/harness/gkfs.io/write.cpp index e7509a9f7..e6f0059d2 100644 --- a/tests/integration/harness/gkfs.io/write.cpp +++ b/tests/integration/harness/gkfs.io/write.cpp @@ -49,6 +49,7 @@ struct write_options { std::string pathname; std::string data; ::size_t count; + bool append{false}; REFL_DECL_STRUCT(write_options, REFL_DECL_MEMBER(bool, verbose), REFL_DECL_MEMBER(std::string, pathname), @@ -71,8 +72,10 @@ to_json(json& record, const write_output& out) { void write_exec(const write_options& opts) { - - auto fd = ::open(opts.pathname.c_str(), O_WRONLY); + auto flags = O_WRONLY; + if(opts.append) + flags |= O_APPEND; + auto fd = ::open(opts.pathname.c_str(), flags); if(fd == -1) { if(opts.verbose) { @@ -125,5 +128,9 @@ write_init(CLI::App& app) { ->required() ->type_name(""); + cmd->add_option("append", opts->append, "Append file") + ->default_val(false) + ->type_name(""); + cmd->callback([opts]() { write_exec(*opts); }); } diff --git a/tests/integration/harness/gkfs.py b/tests/integration/harness/gkfs.py index 9830e9c3d..3425ed300 100644 --- a/tests/integration/harness/gkfs.py +++ b/tests/integration/harness/gkfs.py @@ -45,6 +45,7 @@ gkfs_daemon_log_file = 'gkfs_daemon.log' gkfs_daemon_log_level = '100' gkfs_client_log_file = 'gkfs_client.log' gkfs_client_log_level = 'all' +gkfs_client_log_syscall_filter = 'epoll_wait,epoll_create' gkfs_daemon_active_log_pattern = r'Startup successful. Daemon is ready.' gkfwd_daemon_cmd = 'gkfwd_daemon' @@ -414,11 +415,12 @@ class Client: self._preload_library = preloads[0] self._patched_env = { - 'LD_LIBRARY_PATH' : libdirs, - 'LD_PRELOAD' : self._preload_library, - 'LIBGKFS_HOSTS_FILE' : self.cwd / gkfs_hosts_file, - 'LIBGKFS_LOG' : gkfs_client_log_level, - 'LIBGKFS_LOG_OUTPUT' : self._workspace.logdir / gkfs_client_log_file + 'LD_LIBRARY_PATH': libdirs, + 'LD_PRELOAD': self._preload_library, + 'LIBGKFS_HOSTS_FILE': self.cwd / gkfs_hosts_file, + 'LIBGKFS_LOG': gkfs_client_log_level, + 'LIBGKFS_LOG_OUTPUT': self._workspace.logdir / gkfs_client_log_file, + 'LIBGKFS_LOG_SYSCALL_FILTER': gkfs_client_log_syscall_filter } self._env.update(self._patched_env) diff --git a/tests/integration/operations/test_write_operations.py b/tests/integration/operations/test_write_operations.py index 9094e1fa6..a49275649 100644 --- a/tests/integration/operations/test_write_operations.py +++ b/tests/integration/operations/test_write_operations.py @@ -53,10 +53,40 @@ def test_write(gkfs_daemon, gkfs_client): buf = b'42' ret = gkfs_client.write(file, buf, len(buf)) - assert ret.retval == len(buf) # Return the number of written bytes + assert ret.retval == len(buf) # Return the number of written bytes -def test_pwrite(gkfs_daemon, gkfs_client): + file_append = gkfs_daemon.mountdir / "file_append" + + ret = gkfs_client.open(file_append, + os.O_CREAT | os.O_WRONLY | os.O_APPEND, + stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) + + assert ret.retval == 10000 + + str1 = b'Hello' + str2 = b', World!' + str3 = b' This is a test.\n' + + ret = gkfs_client.write(file_append, str1, len(str1), True) + assert ret.retval == len(str1) + ret = gkfs_client.stat(file_append) + assert ret.retval == 0 + assert ret.statbuf.st_size == len(str1) + + ret = gkfs_client.write(file_append, str2, len(str2), True) + assert ret.retval == len(str2) + ret = gkfs_client.stat(file_append) + assert ret.retval == 0 + assert ret.statbuf.st_size == (len(str1) + len(str2)) + ret = gkfs_client.write(file_append, str3, len(str3), True) + assert ret.retval == len(str3) + ret = gkfs_client.stat(file_append) + assert ret.retval == 0 + assert ret.statbuf.st_size == (len(str1) + len(str2) + len(str3)) + + +def test_pwrite(gkfs_daemon, gkfs_client): file = gkfs_daemon.mountdir / "file" ret = gkfs_client.open(file, @@ -69,10 +99,10 @@ def test_pwrite(gkfs_daemon, gkfs_client): # write at the offset 1024 ret = gkfs_client.pwrite(file, buf, len(buf), 1024) - assert ret.retval == len(buf) # Return the number of written bytes + assert ret.retval == len(buf) # Return the number of written bytes -def test_writev(gkfs_daemon, gkfs_client): +def test_writev(gkfs_daemon, gkfs_client): file = gkfs_daemon.mountdir / "file" ret = gkfs_client.open(file, @@ -85,10 +115,10 @@ def test_writev(gkfs_daemon, gkfs_client): buf_1 = b'24' ret = gkfs_client.writev(file, buf_0, buf_1, 2) - assert ret.retval == len(buf_0) + len(buf_1) # Return the number of written bytes + assert ret.retval == len(buf_0) + len(buf_1) # Return the number of written bytes -def test_pwritev(gkfs_daemon, gkfs_client): +def test_pwritev(gkfs_daemon, gkfs_client): file = gkfs_daemon.mountdir / "file" ret = gkfs_client.open(file, @@ -101,4 +131,4 @@ def test_pwritev(gkfs_daemon, gkfs_client): buf_1 = b'24' ret = gkfs_client.pwritev(file, buf_0, buf_1, 2, 1024) - assert ret.retval == len(buf_0) + len(buf_1) # Return the number of written bytes + assert ret.retval == len(buf_0) + len(buf_1) # Return the number of written bytes diff --git a/tests/integration/syscalls/test_error_operations.py b/tests/integration/syscalls/test_error_operations.py index a5ce6bd94..d2afd5c73 100644 --- a/tests/integration/syscalls/test_error_operations.py +++ b/tests/integration/syscalls/test_error_operations.py @@ -47,7 +47,7 @@ def test_open_error(gkfs_daemon, gkfs_client): file2 = gkfs_daemon.mountdir / "file2" file3 = gkfs_daemon.mountdir / "file3" - flags = [os.O_PATH, os.O_APPEND, os.O_CREAT | os.O_DIRECTORY] + flags = [os.O_PATH, os.O_CREAT | os.O_DIRECTORY] # create a file in gekkofs for flag in flags: -- GitLab From b59ecbb990f416a5e7b82b0b17f7a79e32f65108 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 8 Mar 2023 18:44:51 +0100 Subject: [PATCH 6/7] Added changelog entry --- CHANGELOG.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9d31614c..29427af38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,8 +17,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Support for increasing file size via `truncate()` added ([!159](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/159) - Added PowerPC support ([!151](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/151)). -- GKFS_RENAME_SUPPORT adds support for renaming files. It includes the use case of renaming opened files using the fd -- FLOCK and fcntl functions for locks, are not supported, but they are available. +- GKFS_RENAME_SUPPORT added to support renaming files. This specifically targets the use case for opened files using an + existing file descriptor ([!133](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/133)). +- Added `FLOCK` and `fcntl` functions for locks to interception albeit not supported by GekkoFS and returning the + corresponding error code ([!133](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/133)). - Added support for [CMake presets](https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html) to simplify build configurations ([!163](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/163#note_8179)). - Several improvements to CMake scripts ([!143](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/143))): @@ -33,6 +35,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Adds the `gkfs_feature_summary()` to allow printing a summary of all GekkoFS configuration options and their values. This should help users when building to precisely see how a GekkoFS instance has been configured. +- Added (parallel) append support for consecutive writes with file descriptor opened + with `O_APPEND` ([!164](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/164)). ### Changed @@ -72,6 +76,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). version ([!162](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/162)) - Fixed an issue where nlohmann json failed to download in CMake ([!167](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/167)). +- Refactored update file size during write + operations ([!164](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/164)). ## [0.9.1] - 2022-04-29 -- GitLab From 6c264285a3ed2a55cd9ffadf1f69b83a2c62aeb1 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 19 Jun 2023 16:19:00 +0200 Subject: [PATCH 7/7] Added documentation on the merge id --- include/daemon/backend/metadata/merge.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/daemon/backend/metadata/merge.hpp b/include/daemon/backend/metadata/merge.hpp index 52fe30bcd..09648c642 100644 --- a/include/daemon/backend/metadata/merge.hpp +++ b/include/daemon/backend/metadata/merge.hpp @@ -81,6 +81,12 @@ private: constexpr const static char serialize_end = '\0'; size_t size_; + /* + * ID of the merge operation that this operand belongs to. + * This ID is used only in append operations to communicate the starting + * write offset from the asynchronous Merge operation back to the caller in + * `increase_size_impl()`. + */ uint16_t merge_id_; bool append_; -- GitLab