From c3e662be4dd942cd708d19ea7594eea2048e560d Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 11 Mar 2026 07:22:12 +0100 Subject: [PATCH] server-side -C option --- examples/gfind/sfind.cpp | 13 ++++-- include/client/rpc/forward_metadata.hpp | 5 ++- include/common/rpc/rpc_types_thallium.hpp | 8 ++-- include/daemon/backend/metadata/db.hpp | 4 +- .../backend/metadata/metadata_backend.hpp | 8 ++-- .../backend/metadata/rocksdb_backend.hpp | 3 +- src/client/gkfs_functions.cpp | 10 +++-- src/client/gkfs_metadata.cpp | 20 ++++++--- src/client/rpc/forward_metadata.cpp | 20 ++++++--- src/daemon/backend/metadata/db.cpp | 6 +-- .../backend/metadata/rocksdb_backend.cpp | 43 ++++++++++++------- src/daemon/handler/srv_metadata.cpp | 8 ++-- 12 files changed, 96 insertions(+), 52 deletions(-) diff --git a/examples/gfind/sfind.cpp b/examples/gfind/sfind.cpp index 9d0c69a96..eb15a3594 100644 --- a/examples/gfind/sfind.cpp +++ b/examples/gfind/sfind.cpp @@ -43,7 +43,9 @@ gkfs_getsingleserverdir_filtered(const char* path, struct dirent_extended** dirp, int server, const char* start_key, const char* filter_name, int64_t filter_size, int64_t filter_ctime, - char** last_key_out, uint64_t* total_checked_out) __attribute__((weak)); + int count_only, + char** last_key_out, uint64_t* matched_count_out, + uint64_t* total_checked_out) __attribute__((weak)); /* PFIND OPTIONS EXTENDED */ typedef struct { @@ -267,13 +269,15 @@ worker_routine(void* arg) { ssize_t n = -1; uint64_t server_checked_count = 0; + uint64_t server_matched_count = 0; for(size_t i = 0; i < max_retries; ++i) { n = gkfs_getsingleserverdir_filtered( data->workdir->c_str(), &entries, server_id, last_key ? last_key : "", data->opt->name_pattern.c_str(), f_size, f_ctime, - &new_last_key, &server_checked_count); + data->opt->just_count, + &new_last_key, &server_matched_count, &server_checked_count); if(n >= 0) break; // simple retry delay could be added here @@ -296,7 +300,7 @@ worker_routine(void* arg) { local_checked += server_checked_count; - if(n > 0 && entries) { + if(n > 0 && entries && !data->opt->just_count) { char* ptr = reinterpret_cast(entries); int bytes_processed = 0; while(bytes_processed < n) { @@ -321,6 +325,9 @@ worker_routine(void* arg) { } else { if(entries) free(entries); + if (data->opt->just_count) { + local_found += server_matched_count; + } } if(last_key) diff --git a/include/client/rpc/forward_metadata.hpp b/include/client/rpc/forward_metadata.hpp index e26b25c2d..52f728969 100644 --- a/include/client/rpc/forward_metadata.hpp +++ b/include/client/rpc/forward_metadata.hpp @@ -139,11 +139,12 @@ forward_read_inline(const std::string& path, void* buf, off64_t offset, std::tuple>, - uint64_t, std::string> + uint64_t, uint64_t, std::string> forward_get_dirents_filtered(const std::string& path, int server, const std::string& start_key, const std::string& filter_name, - int64_t filter_size, int64_t filter_ctime); + int64_t filter_size, int64_t filter_ctime, + bool count_only = false); } // namespace rpc } // namespace gkfs diff --git a/include/common/rpc/rpc_types_thallium.hpp b/include/common/rpc/rpc_types_thallium.hpp index 433d1dbaf..b78ea7765 100644 --- a/include/common/rpc/rpc_types_thallium.hpp +++ b/include/common/rpc/rpc_types_thallium.hpp @@ -314,12 +314,13 @@ struct rpc_get_dirents_filtered_in_t { std::string filter_name; int64_t filter_size; int64_t filter_ctime; + bool count_only; template void serialize(Archive& ar) { - ar(path, start_key, bulk_handle, filter_name, filter_size, - filter_ctime); + ar(path, start_key, bulk_handle, filter_name, filter_size, filter_ctime, + count_only); } }; @@ -328,11 +329,12 @@ struct rpc_get_dirents_filtered_out_t { size_t dirents_size; uint64_t total_checked; std::string last_scanned_key; + uint64_t matched_count; template void serialize(Archive& ar) { - ar(err, dirents_size, total_checked, last_scanned_key); + ar(err, dirents_size, total_checked, last_scanned_key, matched_count); } }; diff --git a/include/daemon/backend/metadata/db.hpp b/include/daemon/backend/metadata/db.hpp index bee2352ae..328296d59 100644 --- a/include/daemon/backend/metadata/db.hpp +++ b/include/daemon/backend/metadata/db.hpp @@ -210,11 +210,11 @@ public: */ [[nodiscard]] std::tuple< std::vector>, - uint64_t, std::string> + size_t, size_t, std::string> get_dirents_filtered(const std::string& dir, const std::string& start_key, const std::string& filter_name, const int64_t filter_size, const int64_t filter_ctime, - size_t max_entries = 0) const; + bool count_only = false, size_t max_entries = 0) const; /** * @brief Iterate over complete database, note ONLY used for debugging and diff --git a/include/daemon/backend/metadata/metadata_backend.hpp b/include/daemon/backend/metadata/metadata_backend.hpp index b7895947f..4d98bb9b6 100644 --- a/include/daemon/backend/metadata/metadata_backend.hpp +++ b/include/daemon/backend/metadata/metadata_backend.hpp @@ -95,10 +95,11 @@ public: virtual std::tuple< std::vector>, - uint64_t, std::string> + size_t, size_t, std::string> get_dirents_filtered(const std::string& dir, const std::string& start_key, const std::string& filter_name, const int64_t filter_size, const int64_t filter_ctime, + bool count_only = false, size_t max_entries = 0) const = 0; virtual void* @@ -186,14 +187,15 @@ public: std::tuple< std::vector>, - uint64_t, std::string> + size_t, size_t, std::string> get_dirents_filtered(const std::string& dir, const std::string& start_key, const std::string& filter_name, const int64_t filter_size, const int64_t filter_ctime, + bool count_only = false, size_t max_entries = 0) const { return static_cast(*this).get_dirents_filtered_impl( dir, start_key, filter_name, filter_size, filter_ctime, - max_entries); + count_only, max_entries); } void* diff --git a/include/daemon/backend/metadata/rocksdb_backend.hpp b/include/daemon/backend/metadata/rocksdb_backend.hpp index bbe1d211c..0a9adec32 100644 --- a/include/daemon/backend/metadata/rocksdb_backend.hpp +++ b/include/daemon/backend/metadata/rocksdb_backend.hpp @@ -194,12 +194,13 @@ public: std::tuple< std::vector>, - uint64_t, std::string> + size_t, size_t, std::string> get_dirents_filtered_impl(const std::string& root_path, const std::string& start_key, const std::string& filter_name, const int64_t filter_size, const int64_t filter_ctime, + bool count_only = false, size_t max_entries = 0) const; /** diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index c747f70af..3e569ce4b 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -311,13 +311,15 @@ gkfs_utimensat(const std::string& path, const struct timespec times[2]) { std::tuple>, - uint64_t, std::string> + uint64_t, uint64_t, std::string> gkfs_getsingleserverdir_filtered(const std::string& path, int server, const std::string& start_key, const std::string& filter_name, - int64_t filter_size, int64_t filter_ctime) { - return gkfs::rpc::forward_get_dirents_filtered( - path, server, start_key, filter_name, filter_size, filter_ctime); + int64_t filter_size, int64_t filter_ctime, + bool count_only) { + return gkfs::rpc::forward_get_dirents_filtered(path, server, start_key, + filter_name, filter_size, + filter_ctime, count_only); } } // namespace gkfs::syscall diff --git a/src/client/gkfs_metadata.cpp b/src/client/gkfs_metadata.cpp index ef0e1ed53..ce2fd576d 100644 --- a/src/client/gkfs_metadata.cpp +++ b/src/client/gkfs_metadata.cpp @@ -1686,12 +1686,14 @@ gkfs_getsingleserverdir_filtered(const char* path, struct dirent_extended** dirp, int server, const char* start_key, const char* filter_name, int64_t filter_size, int64_t filter_ctime, - char** last_key_out, + int count_only, char** last_key_out, + uint64_t* matched_count_out, uint64_t* total_checked_out) { auto ret = gkfs::rpc::forward_get_dirents_filtered( path, server, start_key ? start_key : "", - filter_name ? filter_name : "", filter_size, filter_ctime); + filter_name ? filter_name : "", filter_size, filter_ctime, + count_only != 0); int err = std::get<0>(ret); if(err) { @@ -1702,14 +1704,16 @@ gkfs_getsingleserverdir_filtered(const char* path, const auto& entries = std::get<1>(ret); if(entries.empty()) { if(last_key_out) { - auto last_key = std::get<3>(ret); + auto last_key = std::get<4>(ret); if(!last_key.empty()) *last_key_out = strdup(last_key.c_str()); else *last_key_out = nullptr; } + if(matched_count_out) + *matched_count_out = std::get<2>(ret); if(total_checked_out) - *total_checked_out = std::get<2>(ret); + *total_checked_out = std::get<3>(ret); return 0; } @@ -1759,15 +1763,19 @@ gkfs_getsingleserverdir_filtered(const char* path, } if(last_key_out) { - auto last_key = std::get<3>(ret); + auto last_key = std::get<4>(ret); if(!last_key.empty()) *last_key_out = strdup(last_key.c_str()); else *last_key_out = nullptr; } + if(matched_count_out) { + *matched_count_out = std::get<2>(ret); + } + if(total_checked_out) { - *total_checked_out = std::get<2>(ret); + *total_checked_out = std::get<3>(ret); } return total_required_size; diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index 5baf556ec..be074d23c 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -827,11 +827,12 @@ forward_get_dirents_single(const string& path, int server, std::tuple>, - uint64_t, std::string> + uint64_t, uint64_t, std::string> forward_get_dirents_filtered(const std::string& path, int server, const std::string& start_key, const std::string& filter_name, - int64_t filter_size, int64_t filter_ctime) { + int64_t filter_size, int64_t filter_ctime, + bool count_only) { auto endp = CTX->hosts().at(server); @@ -841,6 +842,7 @@ forward_get_dirents_filtered(const std::string& path, int server, in.filter_name = filter_name; in.filter_size = filter_size; in.filter_ctime = filter_ctime; + in.count_only = count_only; // TODO makes sure the buffer size is sufficient @@ -853,6 +855,7 @@ forward_get_dirents_filtered(const std::string& path, int server, int err = 0; std::string last_scanned_key; uint64_t total_checked = 0; + uint64_t matched_count = 0; // Retry loop in case of buffer being too small while(true) { @@ -885,12 +888,15 @@ forward_get_dirents_filtered(const std::string& path, int server, break; } - // reuse standard decompression for now as format is same - entries = gkfs::rpc::decompress_and_parse_entries( - out, large_buffer.get()); + if(!count_only) { + // reuse standard decompression for now as format is same + entries = gkfs::rpc::decompress_and_parse_entries( + out, large_buffer.get()); + } last_scanned_key = out.last_scanned_key; total_checked = out.total_checked; + matched_count = out.matched_count; break; } catch(const std::exception& e) { @@ -901,8 +907,8 @@ forward_get_dirents_filtered(const std::string& path, int server, } } - return std::make_tuple(err, std::move(entries), total_checked, - last_scanned_key); + return std::make_tuple(err, std::move(entries), matched_count, + total_checked, last_scanned_key); } } // namespace gkfs::rpc diff --git a/src/daemon/backend/metadata/db.cpp b/src/daemon/backend/metadata/db.cpp index ff3d23355..deed56623 100644 --- a/src/daemon/backend/metadata/db.cpp +++ b/src/daemon/backend/metadata/db.cpp @@ -223,12 +223,12 @@ MetadataDB::get_all_dirents_extended(const std::string& dir, } std::tuple>, - uint64_t, std::string> + size_t, size_t, std::string> MetadataDB::get_dirents_filtered(const std::string& dir, const std::string& start_key, const std::string& filter_name, const int64_t filter_size, - const int64_t filter_ctime, + const int64_t filter_ctime, bool count_only, size_t max_entries) const { auto root_path = dir; assert(gkfs::path::is_absolute(root_path)); @@ -236,7 +236,7 @@ MetadataDB::get_dirents_filtered(const std::string& dir, root_path.push_back('/'); } return backend_->get_dirents_filtered(root_path, start_key, filter_name, - filter_size, filter_ctime, + filter_size, filter_ctime, count_only, max_entries); } diff --git a/src/daemon/backend/metadata/rocksdb_backend.cpp b/src/daemon/backend/metadata/rocksdb_backend.cpp index 6a5f93e3d..0790ae284 100644 --- a/src/daemon/backend/metadata/rocksdb_backend.cpp +++ b/src/daemon/backend/metadata/rocksdb_backend.cpp @@ -554,13 +554,11 @@ RocksDBBackend::get_all_dirents_extended_impl(const std::string& dir, // Return all the filtered entries with root in the path specified std::tuple>, - uint64_t, std::string> -RocksDBBackend::get_dirents_filtered_impl(const std::string& dir, - const std::string& start_key, - const std::string& filter_name, - const int64_t filter_size, - const int64_t filter_ctime, - size_t max_entries) const { + size_t, size_t, std::string> +RocksDBBackend::get_dirents_filtered_impl( + const std::string& dir, const std::string& start_key, + const std::string& filter_name, const int64_t filter_size, + const int64_t filter_ctime, bool count_only, size_t max_entries) const { auto root_path = dir; rocksdb::ReadOptions ropts; auto it = db_->NewIterator(ropts); @@ -579,7 +577,7 @@ RocksDBBackend::get_dirents_filtered_impl(const std::string& dir, name_regex = std::regex(filter_name); } catch(const std::regex_error& e) { // fallback if invalid regex - return {entries, 0, ""}; + return {entries, 0, 0, ""}; } } @@ -656,13 +654,23 @@ RocksDBBackend::get_dirents_filtered_impl(const std::string& dir, if(matched) { unsigned char type = 0; - if(S_ISDIR(md.mode())) { - type = 1; - } else if(S_ISLNK(md.mode())) { - type = 2; + if(count_only) { + // If counting only, we don't store the metadata + // We just record a match implicitly (matched is true here) + // However, we still need to respect max_entries for pagination + // The actual count of matched items is what matters. + entries.emplace_back(std::forward_as_tuple( + "", type, 0, + 0)); // Dummy entry to keep track of size/pagination + } else { + if(S_ISDIR(md.mode())) { + type = 1; + } else if(S_ISLNK(md.mode())) { + type = 2; + } + entries.emplace_back(std::forward_as_tuple( + std::move(relative_name), type, md.size(), md.ctime())); } - entries.emplace_back(std::forward_as_tuple( - std::move(relative_name), type, md.size(), md.ctime())); if(max_entries > 0 && entries.size() >= max_entries) { eof = false; break; @@ -674,8 +682,13 @@ RocksDBBackend::get_dirents_filtered_impl(const std::string& dir, last_scanned_key = ""; } + size_t matched_count = entries.size(); + if(count_only) { + entries.clear(); + } + // assert(it->status().ok()); // only if eof check? - return {entries, scanned_count, last_scanned_key}; + return {entries, matched_count, scanned_count, last_scanned_key}; } diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index f98798aae..37457227b 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -752,6 +752,7 @@ rpc_srv_get_dirents_filtered( out.err = EIO; out.dirents_size = 0; out.total_checked = 0; + out.matched_count = 0; out.last_scanned_key = ""; size_t client_bulk_size = in.bulk_handle.size(); @@ -768,10 +769,11 @@ rpc_srv_get_dirents_filtered( try { auto ret = GKFS_DATA->mdb()->get_dirents_filtered( in.path, in.start_key, in.filter_name, in.filter_size, - in.filter_ctime, 0); + in.filter_ctime, in.count_only, 0); entries = std::get<0>(ret); - out.total_checked = std::get<1>(ret); - last_scanned_key = std::get<2>(ret); + out.matched_count = std::get<1>(ret); + out.total_checked = std::get<2>(ret); + last_scanned_key = std::get<3>(ret); } catch(const std::exception& e) { GKFS_DATA->spdlogger()->error( "{}() Error during get_dirents_filtered(): '{}'", __func__, -- GitLab