LCOV - code coverage report
Current view: top level - src/daemon/backend/metadata - rocksdb_backend.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 116 138 84.1 %
Date: 2024-04-30 13:21:35 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :   Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain
       3             :   Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany
       4             : 
       5             :   This software was partially supported by the
       6             :   EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).
       7             : 
       8             :   This software was partially supported by the
       9             :   ADA-FS project under the SPPEXA project funded by the DFG.
      10             : 
      11             :   This file is part of GekkoFS.
      12             : 
      13             :   GekkoFS is free software: you can redistribute it and/or modify
      14             :   it under the terms of the GNU General Public License as published by
      15             :   the Free Software Foundation, either version 3 of the License, or
      16             :   (at your option) any later version.
      17             : 
      18             :   GekkoFS is distributed in the hope that it will be useful,
      19             :   but WITHOUT ANY WARRANTY; without even the implied warranty of
      20             :   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      21             :   GNU General Public License for more details.
      22             : 
      23             :   You should have received a copy of the GNU General Public License
      24             :   along with GekkoFS.  If not, see <https://www.gnu.org/licenses/>.
      25             : 
      26             :   SPDX-License-Identifier: GPL-3.0-or-later
      27             : */
      28             : 
      29             : #include <daemon/backend/metadata/db.hpp>
      30             : #include <daemon/backend/metadata/merge.hpp>
      31             : #include <daemon/backend/exceptions.hpp>
      32             : #include <daemon/backend/metadata/metadata_module.hpp>
      33             : 
      34             : #include <common/metadata.hpp>
      35             : #include <common/path_util.hpp>
      36             : #include <iostream>
      37             : #include <daemon/backend/metadata/rocksdb_backend.hpp>
      38             : extern "C" {
      39             : #include <sys/stat.h>
      40             : }
      41             : 
      42             : namespace gkfs::metadata {
      43             : 
      44             : /**
      45             :  * Called when the daemon is started: Connects to the KV store
      46             :  * @param path where KV store data is stored
      47             :  */
      48          33 : RocksDBBackend::RocksDBBackend(const std::string& path) {
      49             : 
      50             :     // Optimize RocksDB. This is the easiest way to get RocksDB to perform well
      51          33 :     options_.IncreaseParallelism();
      52          33 :     options_.OptimizeLevelStyleCompaction();
      53             :     // create the DB if it's not already present
      54          33 :     options_.create_if_missing = true;
      55          33 :     options_.merge_operator.reset(new MetadataMergeOperator);
      56          33 :     optimize_database_impl();
      57          33 :     write_opts_.disableWAL = !(gkfs::config::rocksdb::use_write_ahead_log);
      58          33 :     rdb::DB* rdb_ptr = nullptr;
      59          33 :     auto s = rocksdb::DB::Open(options_, path, &rdb_ptr);
      60          33 :     if(!s.ok()) {
      61           0 :         throw std::runtime_error("Failed to open RocksDB: " + s.ToString());
      62             :     }
      63          33 :     this->db_.reset(rdb_ptr);
      64          33 : }
      65             : 
      66             : 
      67          66 : RocksDBBackend::~RocksDBBackend() {
      68          66 :     this->db_.reset();
      69          66 : }
      70             : 
      71             : /**
      72             :  * Exception wrapper on Status object. Throws NotFoundException if
      73             :  * s.IsNotFound(), general DBException otherwise
      74             :  * @param RocksDB status
      75             :  * @throws DBException
      76             :  */
      77             : void
      78          24 : RocksDBBackend::throw_status_excpt(const rdb::Status& s) {
      79          24 :     assert(!s.ok());
      80             : 
      81          24 :     if(s.IsNotFound()) {
      82          48 :         throw NotFoundException(s.ToString());
      83             :     } else {
      84           0 :         throw DBException(s.ToString());
      85             :     }
      86             : }
      87             : 
      88             : 
      89             : /**
      90             :  * Gets a KV store value for a key
      91             :  * @param key
      92             :  * @return value
      93             :  * @throws DBException on failure, NotFoundException if entry doesn't exist
      94             :  */
      95             : std::string
      96        1406 : RocksDBBackend::get_impl(const std::string& key) const {
      97        1406 :     std::string val;
      98             : 
      99        1430 :     auto s = db_->Get(rdb::ReadOptions(), key, &val);
     100        1406 :     if(!s.ok()) {
     101          24 :         throw_status_excpt(s);
     102             :     }
     103             : 
     104        1382 :     return val;
     105             : }
     106             : 
     107             : /**
     108             :  * Puts an entry into the KV store
     109             :  * @param key
     110             :  * @param val
     111             :  * @throws DBException on failure
     112             :  */
     113             : void
     114        1097 : RocksDBBackend::put_impl(const std::string& key, const std::string& val) {
     115             : 
     116        1097 :     auto cop = CreateOperand(val);
     117        2194 :     auto s = db_->Merge(write_opts_, key, cop.serialize());
     118        1097 :     if(!s.ok()) {
     119           0 :         throw_status_excpt(s);
     120             :     }
     121        1097 : }
     122             : 
     123             : /**
     124             :  * Puts an entry into the KV store if it doesn't exist. This function does not
     125             :  * use a mutex.
     126             :  * @param key
     127             :  * @param val
     128             :  * @throws DBException on failure, ExistException if entry already exists
     129             :  */
     130             : void
     131        1100 : RocksDBBackend::put_no_exist_impl(const std::string& key,
     132             :                                   const std::string& val) {
     133             : 
     134        1100 :     if(exists(key))
     135           3 :         throw ExistsException(key);
     136        1097 :     put(key, val);
     137        1097 : }
     138             : 
     139             : /**
     140             :  * Removes an entry from the KV store
     141             :  * @param key
     142             :  * @throws DBException on failure, NotFoundException if entry doesn't exist
     143             :  */
     144             : void
     145           8 : RocksDBBackend::remove_impl(const std::string& key) {
     146             : 
     147           8 :     auto s = db_->Delete(write_opts_, key);
     148           8 :     if(!s.ok()) {
     149           0 :         throw_status_excpt(s);
     150             :     }
     151           8 : }
     152             : 
     153             : /**
     154             :  * checks for existence of an entry
     155             :  * @param key
     156             :  * @return true if exists
     157             :  * @throws DBException on failure
     158             :  */
     159             : bool
     160        1100 : RocksDBBackend::exists_impl(const std::string& key) {
     161             : 
     162        2200 :     std::string val;
     163             : 
     164        2200 :     auto s = db_->Get(rdb::ReadOptions(), key, &val);
     165        1100 :     if(!s.ok()) {
     166        1097 :         if(s.IsNotFound()) {
     167             :             return false;
     168             :         } else {
     169           0 :             throw_status_excpt(s);
     170             :         }
     171             :     }
     172             :     return true;
     173             : }
     174             : 
     175             : /**
     176             :  * Updates a metadentry atomically and also allows to change keys
     177             :  * @param old_key
     178             :  * @param new_key
     179             :  * @param val
     180             :  * @throws DBException on failure, NotFoundException if entry doesn't exist
     181             :  */
     182             : void
     183          31 : RocksDBBackend::update_impl(const std::string& old_key,
     184             :                             const std::string& new_key,
     185             :                             const std::string& val) {
     186             : 
     187             :     // TODO use rdb::Put() method
     188          31 :     rdb::WriteBatch batch;
     189          31 :     batch.Delete(old_key);
     190          31 :     batch.Put(new_key, val);
     191          62 :     auto s = db_->Write(write_opts_, &batch);
     192          31 :     if(!s.ok()) {
     193           0 :         throw_status_excpt(s);
     194             :     }
     195          31 : }
     196             : 
     197             : /**
     198             :  * Updates the size on the metadata
     199             :  * Operation. E.g., called before a write() call
     200             :  *
     201             :  * A special case represents the append operation. Since multiple processes
     202             :  * could want to append a file in parallel, the corresponding offsets where the
     203             :  * write operation starts, needs to be reserved. This is an expensive operation
     204             :  * as we need to force a RocksDB Merge operation to receive the starting offset
     205             :  * for this write request.
     206             :  *
     207             :  * @param key
     208             :  * @param io_size
     209             :  * @param offset
     210             :  * @param append
     211             :  * @return offset where the write operation should start. This is only used when
     212             :  * append is set
     213             :  */
     214             : off_t
     215          41 : RocksDBBackend::increase_size_impl(const std::string& key, size_t io_size,
     216             :                                    off_t offset, bool append) {
     217          41 :     off_t out_offset = -1;
     218          41 :     if(append) {
     219           3 :         auto merge_id = gkfs::metadata::gen_unique_id(key);
     220             :         // no offset needed because new size is current file size + io_size
     221           3 :         auto uop = IncreaseSizeOperand(io_size, merge_id, append);
     222           6 :         auto s = db_->Merge(write_opts_, key, uop.serialize());
     223           3 :         if(!s.ok()) {
     224           0 :             throw_status_excpt(s);
     225             :         } else {
     226             :             // force merge operation to run
     227           3 :             get_impl(key);
     228           3 :             try {
     229             :                 // the offset was added during FullMergeV2() call
     230           3 :                 out_offset =
     231           3 :                         GKFS_METADATA_MOD->append_offset_reserve_get_and_erase(
     232             :                                 merge_id);
     233           0 :             } catch(std::out_of_range& e) {
     234           0 :                 GKFS_METADATA_MOD->log()->warn(
     235             :                         "{}() - out_of_range exception: {} when attempting to get offset for key {}",
     236           0 :                         __func__, e.what(), key);
     237             :             }
     238             :         }
     239             :     } else {
     240             :         // In the standard case we simply add the I/O request size to the
     241             :         // offset.
     242          38 :         auto uop = IncreaseSizeOperand(offset + io_size);
     243          76 :         auto s = db_->Merge(write_opts_, key, uop.serialize());
     244          38 :         if(!s.ok()) {
     245           0 :             throw_status_excpt(s);
     246             :         }
     247             :     }
     248          41 :     return out_offset;
     249             : }
     250             : 
     251             : /**
     252             :  * Decreases the size on the metadata
     253             :  * Operation E.g., called before a truncate() call
     254             :  * @param key
     255             :  * @param size
     256             :  * @throws DBException on failure
     257             :  */
     258             : void
     259           3 : RocksDBBackend::decrease_size_impl(const std::string& key, size_t size) {
     260             : 
     261           3 :     auto uop = DecreaseSizeOperand(size);
     262           3 :     auto s = db_->Merge(write_opts_, key, uop.serialize());
     263           3 :     if(!s.ok()) {
     264           0 :         throw_status_excpt(s);
     265             :     }
     266           3 : }
     267             : 
     268             : /**
     269             :  * Return all the first-level entries of the directory @dir
     270             :  *
     271             :  * @return vector of pair <std::string name, bool is_dir>,
     272             :  *         where name is the name of the entries and is_dir
     273             :  *         is true in the case the entry is a directory.
     274             :  */
     275             : std::vector<std::pair<std::string, bool>>
     276          25 : RocksDBBackend::get_dirents_impl(const std::string& dir) const {
     277          25 :     auto root_path = dir;
     278          50 :     rocksdb::ReadOptions ropts;
     279          25 :     auto it = db_->NewIterator(ropts);
     280             : 
     281          25 :     std::vector<std::pair<std::string, bool>> entries;
     282        1072 :     for(it->Seek(root_path); it->Valid() && it->key().starts_with(root_path);
     283        1047 :         it->Next()) {
     284             : 
     285        1047 :         if(it->key().size() == root_path.size()) {
     286             :             // we skip this path cause it is exactly the root_path
     287           6 :             continue;
     288             :         }
     289             : 
     290             :         /***** Get File name *****/
     291        2070 :         auto name = it->key().ToString();
     292        1041 :         if(name.find_first_of('/', root_path.size()) != std::string::npos) {
     293             :             // skip stuff deeper then one level depth
     294          29 :             continue;
     295             :         }
     296             :         // remove prefix
     297        1030 :         name = name.substr(root_path.size());
     298             : 
     299             :         // relative path of directory entries must not be empty
     300        1030 :         assert(!name.empty());
     301             : 
     302        2059 :         Metadata md(it->value().ToString());
     303             : #ifdef HAS_RENAME
     304             :         // Remove entries with negative blocks (rename)
     305        1030 :         if(md.blocks() == -1) {
     306          13 :             continue;
     307             :         }
     308             : #endif // HAS_RENAME
     309        1029 :         auto is_dir = S_ISDIR(md.mode());
     310             : 
     311        1029 :         entries.emplace_back(std::move(name), is_dir);
     312             :     }
     313          25 :     assert(it->status().ok());
     314          50 :     return entries;
     315             : }
     316             : 
     317             : /**
     318             :  * Return all the first-level entries of the directory @dir
     319             :  *
     320             :  * @return vector of pair <std::string name, bool is_dir - size - ctime>,
     321             :  *         where name is the name of the entries and is_dir
     322             :  *         is true in the case the entry is a directory.
     323             :  */
     324             : std::vector<std::tuple<std::string, bool, size_t, time_t>>
     325           4 : RocksDBBackend::get_dirents_extended_impl(const std::string& dir) const {
     326           4 :     auto root_path = dir;
     327           8 :     rocksdb::ReadOptions ropts;
     328           4 :     auto it = db_->NewIterator(ropts);
     329             : 
     330           4 :     std::vector<std::tuple<std::string, bool, size_t, time_t>> entries;
     331             : 
     332           9 :     for(it->Seek(root_path); it->Valid() && it->key().starts_with(root_path);
     333           5 :         it->Next()) {
     334             : 
     335           5 :         if(it->key().size() == root_path.size()) {
     336             :             // we skip this path cause it is exactly the root_path
     337           0 :             continue;
     338             :         }
     339             : 
     340             :         /***** Get File name *****/
     341           9 :         auto name = it->key().ToString();
     342           5 :         if(name.find_first_of('/', root_path.size()) != std::string::npos) {
     343             :             // skip stuff deeper then one level depth
     344           2 :             continue;
     345             :         }
     346             :         // remove prefix
     347           4 :         name = name.substr(root_path.size());
     348             : 
     349             :         // relative path of directory entries must not be empty
     350           4 :         assert(!name.empty());
     351             : 
     352           8 :         Metadata md(it->value().ToString());
     353             : #ifdef HAS_RENAME
     354             :         // Remove entries with negative blocks (rename)
     355           4 :         if(md.blocks() == -1) {
     356           1 :             continue;
     357             :         }
     358             : #endif // HAS_RENAME
     359           4 :         auto is_dir = S_ISDIR(md.mode());
     360             : 
     361           4 :         entries.emplace_back(std::forward_as_tuple(std::move(name), is_dir,
     362           4 :                                                    md.size(), md.ctime()));
     363             :     }
     364           4 :     assert(it->status().ok());
     365           8 :     return entries;
     366             : }
     367             : 
     368             : 
     369             : /**
     370             :  * Code example for iterating all entries in KV store. This is for debug only as
     371             :  * it is too expensive
     372             :  */
     373             : void
     374           0 : RocksDBBackend::iterate_all_impl() const {
     375           0 :     std::string key;
     376           0 :     std::string val;
     377             :     // Do RangeScan on parent inode
     378           0 :     auto iter = db_->NewIterator(rdb::ReadOptions());
     379           0 :     for(iter->SeekToFirst(); iter->Valid(); iter->Next()) {
     380           0 :         key = iter->key().ToString();
     381           0 :         val = iter->value().ToString();
     382           0 :         std::cout << key << std::endl;
     383             :     }
     384           0 : }
     385             : 
     386             : /**
     387             :  * Used for setting KV store settings
     388             :  */
     389             : void
     390          33 : RocksDBBackend::optimize_database_impl() {
     391          33 :     options_.max_successive_merges = 128;
     392          33 : }
     393             : 
     394             : 
     395             : } // namespace gkfs::metadata

Generated by: LCOV version 1.16