Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#include <daemon/backend/metadata/db.hpp>
#include <daemon/backend/metadata/merge.hpp>
MetadataDB::MetadataDB(const std::string& path): path(path) {
// Optimize RocksDB. This is the easiest way to get RocksDB to perform well
options.IncreaseParallelism();
options.OptimizeLevelStyleCompaction();
// create the DB if it's not already present
options.create_if_missing = true;
options.merge_operator.reset(new MetadataMergeOperator);
MetadataDB::optimize_rocksdb_options(options);
#if !defined(KV_WOL)
write_opts.disableWAL = true;
#endif
rdb::DB * rdb_ptr;
auto s = rocksdb::DB::Open(options, path, &rdb_ptr);
if (!s.ok()) {
std::runtime_error("Failed to opend RocksDB: " + s.ToString());
}
this->db.reset(rdb_ptr);
}
bool MetadataDB::get(const std::string& key, std::string& val) {
auto ok = db->Get(rdb::ReadOptions(), key, &val).ok();
// TODO check what happens if nothing could have been found. Will val be NULL, nullptr, ""?
// It matters because the client RPC is checking for an empty string to see if get_attr was successful or not
return ok;
}
bool MetadataDB::put(const std::string& key, const std::string& val) {
auto cop = CreateOperand(val);
auto s = db->Merge(write_opts, key, cop.serialize());
if(!s.ok()){
//TODO ADAFS_DATA->spdlogger()->error("Failed to create metadentry size. RDB error: [{}]", s.ToString());
}
return s.ok();
}
bool MetadataDB::remove(const std::string& key) {
return db->Delete(write_opts, key).ok();
}
bool MetadataDB::exists(const std::string& key) {
std::string val_str;
return db->Get(rdb::ReadOptions(), key, &val_str).ok();
}
/**
* Updates a metadentry atomically and also allows to change keys
* @param old_key
* @param new_key
* @param val
* @return
*/
bool MetadataDB::update(const std::string& old_key, const std::string& new_key, const std::string& val) {
rdb::WriteBatch batch;
batch.Delete(old_key);
batch.Put(new_key, val);
return db->Write(write_opts, &batch).ok();
}
bool MetadataDB::update_size(const std::string& key, size_t size, off64_t offset, bool append){
auto uop = IncreaseSizeOperand(offset + size, append);
auto s = db->Merge(write_opts, key, uop.serialize());
if(!s.ok()){
//TODO ADAFS_DATA->spdlogger()->error("Failed to update metadentry size. RDB error: [{}]", s.ToString());
}
return s.ok();
}
void MetadataDB::iterate_all() {
std::string key;
std::string val;
// Do RangeScan on parent inode
auto iter = db->NewIterator(rdb::ReadOptions());
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
key = iter->key().ToString();
val = iter->value().ToString();
//TODO ADAFS_DATA->spdlogger()->trace("key '{}' value '{}'", key, val);
}
}
void MetadataDB::optimize_rocksdb_options(rdb::Options& options) {
options.max_successive_merges = 128;
#if defined(KV_OPTIMIZE_RAMDISK)
// as described at https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide
// use mmap read
options.allow_mmap_reads = true;
// disable block cache, enable blook filters and reduce the delta encoding restart interval
rocksdb::BlockBasedTableOptions table_options{};
table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
table_options.no_block_cache = true;
table_options.block_restart_interval = 4;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
// enable lightweight compression (snappy or lz4). We use lz4 for now
options.compression = rocksdb::CompressionType::kLZ4Compression;
// set up compression more aggressively and allocate more threads for flush and compaction
options.level0_file_num_compaction_trigger = 1;
options.max_background_flushes = 8;
options.max_background_compactions = 8;
options.max_subcompactions = 4;
// keep all the files open
options.max_open_files = -1;
#elif defined(KV_OPTIMIZE)
// rocksdb::BlockBasedTableOptions block_options{};
// block_options.block_size = 16384 * 2;
// options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(block_options));
// experimental settings
// options.write_buffer_size = 512;
// options.max_write_buffer_number = 16;
// options.min_write_buffer_number_to_merge = 4;
// These 4 below have the most impact
options.max_bytes_for_level_base = 2048;
options.max_bytes_for_level_multiplier = 10;
options.target_file_size_base = 256;
options.target_file_size_multiplier = 1;
options.max_background_flushes = 1;
options.max_background_compactions = 48;
options.level0_file_num_compaction_trigger = 1;
options.level0_slowdown_writes_trigger = 48;
options.level0_stop_writes_trigger = 56;
// options.arena_block_size = 1024 * 8;
// options.compression = rocksdb::kNoCompression; // doesnt do anything
#endif
#if defined(KV_WRITE_BUFFER)
// write_buffer_size is multiplied by the write_buffer_number to get the amount of data hold in memory.
// at min_write_buffer_number_to_merge rocksdb starts to flush entries out to disk
options.write_buffer_size = KV_WRITE_BUFFER << 20;
// XXX experimental values. We only want one buffer, which is held in memory
options.max_write_buffer_number = 1;
options.min_write_buffer_number_to_merge = 1;
#endif
}