diff --git a/CHANGELOG.md b/CHANGELOG.md index 3795f466690f8b2304a6f6579db04791664cea2d..b55389c2b59d8ba47ee190304db17504ced4e48a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Fixed - SYS_lstat does not exists on some architectures, change to newfstatat ([!269](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/269)) - We cannot use lstat directly as may cause a recursion call on libc interception. + - Un/Packing order of directory entries in compressed format was incorrect ([!281](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/281)) + ## [0.9.5] - 2025-08 ### New diff --git a/include/client/fuse/fuse_client.hpp b/include/client/fuse/fuse_client.hpp index a512d5a2fb5e3c7fcc57a57c3eb5e623603265ad..cbc0a6feda3b203db21074fb0c6e178cb0a7ab73 100644 --- a/include/client/fuse/fuse_client.hpp +++ b/include/client/fuse/fuse_client.hpp @@ -36,7 +36,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later */ - +// clang-format off #ifndef GKFS_CLIENT_FUSE_CONTEXT_HPP #define GKFS_CLIENT_FUSE_CONTEXT_HPP @@ -101,10 +101,10 @@ _Static_assert(!gkfs::config::fuse::pointertrick || #else struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { unsigned _uintptr_to_must_hold_fuse_ino_t - : (!gkfs::config::fuse::pointertrick || - (sizeof(fuse_ino_t) >= sizeof(uintptr_t)) - ? 1 - : -1); + : (!gkfs::config::fuse::pointertrick || + (sizeof(fuse_ino_t) >= sizeof(uintptr_t)) + ? 1 + : -1); }; #endif diff --git a/include/client/rpc/utils.hpp b/include/client/rpc/utils.hpp index 0d75693b9e79ba7be9728b873723c09bdb9592fd..ca537e0384ca3ad0c513ac7010c029d2ad579509 100644 --- a/include/client/rpc/utils.hpp +++ b/include/client/rpc/utils.hpp @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include namespace gkfs::rpc { @@ -104,16 +106,40 @@ decompress_and_parse_entries(const OutputOrErr& out, std::vector> entries; while(p < end) { - bool is_dir = *reinterpret_cast(p); - p += sizeof(bool); + if(p + sizeof(uint8_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", + __func__); + break; + } + bool is_dir = (*reinterpret_cast(p) != 0); + p += sizeof(uint8_t); + + if(p + sizeof(size_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing file_size", + __func__); + break; + } size_t file_size = *reinterpret_cast(p); p += sizeof(size_t); + + if(p + sizeof(time_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing ctime", + __func__); + break; + } time_t ctime = *reinterpret_cast(p); p += sizeof(time_t); - std::string name(p); - p += name.length() + 1; + // Name is null-terminated. We must check we don't go past 'end'. + size_t name_len = strnlen(p, end - p); + if(p + name_len >= end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", + __func__); + break; + } + std::string name(p, name_len); + p += name_len + 1; if(!name.empty()) { entries.emplace_back(name, is_dir, file_size, ctime); diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index cfe72f37644d211da50bec33d07624e3e0208287..88abb497c355fe55d4a178941cefda46d9903040 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -36,6 +36,8 @@ #include #include #include +#include +#include using namespace std; @@ -246,11 +248,24 @@ decompress_and_parse_entries_standard( entries.reserve(out.dirents_size); // Approx while(p < end) { - bool is_dir = (*p != 0); - p += 1; + if(p + sizeof(uint8_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", + __func__); + break; + } + bool is_dir = (*reinterpret_cast(p) != 0); + p += sizeof(uint8_t); + + // Name is null-terminated. We must check we don't go past 'end'. + size_t name_len = strnlen(p, end - p); + if(p + name_len >= end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", + __func__); + break; + } - std::string name(p); - p += name.length() + 1; + std::string name(p, name_len); + p += name_len + 1; if(!name.empty()) { entries.emplace_back(name, is_dir, 0, 0); @@ -311,24 +326,46 @@ decompress_and_parse_entries_filtered( p = static_cast(compressed_buffer); end = p + out.dirents_size; } - std::vector> entries; // We don't know exact count, but can optimize reserve? // entries.reserve(out.dirents_size / 32); while(p < end) { - bool is_dir = (*p != 0); - p += 1; - - std::string name(p); - p += name.length() + 1; + if(p + sizeof(uint8_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", + __func__); + break; + } + bool is_dir = (*reinterpret_cast(p) != 0); + p += sizeof(uint8_t); + if(p + sizeof(size_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing size", + __func__); + break; + } size_t size = *reinterpret_cast(p); p += sizeof(size_t); + if(p + sizeof(time_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing ctime", + __func__); + break; + } time_t ctime = *reinterpret_cast(p); p += sizeof(time_t); + // Name is null-terminated. We must check we don't go past 'end'. + size_t name_len = strnlen(p, end - p); + if(p + name_len >= end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", + __func__); + break; + } + + std::string name(p, name_len); + p += name_len + 1; + if(!name.empty()) { entries.emplace_back(name, is_dir, size, ctime); } diff --git a/src/daemon/backend/metadata/rocksdb_backend.cpp b/src/daemon/backend/metadata/rocksdb_backend.cpp index 2c6434c1c91873fd284db3e8478136b4a6999299..cc5829bc11590297694612614d79e776479453c6 100644 --- a/src/daemon/backend/metadata/rocksdb_backend.cpp +++ b/src/daemon/backend/metadata/rocksdb_backend.cpp @@ -580,8 +580,6 @@ RocksDBBackend::get_dirents_filtered_impl(const std::string& dir, eof = false; break; } - scanned_count++; - // Get File name auto name = it->key().ToString(); // save as potential last key (full path for now, stripped later if @@ -609,6 +607,8 @@ RocksDBBackend::get_dirents_filtered_impl(const std::string& dir, continue; } + scanned_count++; + Metadata md(it->value().ToString()); #ifdef HAS_RENAME // Remove entries with negative blocks (rename) diff --git a/tests/integration/directories/test_packing_order.py b/tests/integration/directories/test_packing_order.py new file mode 100644 index 0000000000000000000000000000000000000000..d650f330f366b41132132bed58b719eb5a1d340f --- /dev/null +++ b/tests/integration/directories/test_packing_order.py @@ -0,0 +1,119 @@ + +import pytest +import logging +import time +from harness.gkfs import Daemon, ShellClient, Client, find_command + +log = logging.getLogger(__name__) + +def test_packing_order_all_fields(test_workspace, request): + """ + Comprehensive test to verify the packing order of compressed directory entries. + It checks filtering by name, size, and ctime (newer). + """ + # Start Daemon with compression ON + daemon_env = { + "GKFS_DAEMON_LOG_LEVEL": "info", + "GKFS_DAEMON_USE_DIRENTS_COMPRESSION": "ON" + } + daemon = Daemon(request.config.getoption('--interface'), "rocksdb", test_workspace, env=daemon_env) + daemon.run() + + try: + client = ShellClient(test_workspace) + mount_dir = test_workspace.mountdir + client_env = {"LIBGKFS_USE_DIRENTS_COMPRESSION": "ON"} + + # 1. Create files with specific properties + # file_a: size 100 + # file_b: size 200 + # (wait) + # file_c: size 100 + + file_a = mount_dir / "file_a" + file_b = mount_dir / "file_b" + file_c = mount_dir / "file_c" + + # Use dd to create files with specific sizes + client.run("dd", "if=/dev/zero", f"of={file_a}", "bs=100", "count=1", env=client_env) + client.run("dd", "if=/dev/zero", f"of={file_b}", "bs=200", "count=1", env=client_env) + + log.info("Waiting 2 seconds to distinguish ctimes...") + time.sleep(2) + log.info("Resuming test after sleep.") + + # Create a timestamp file for 'newer' check + timestamp_file = test_workspace.twd / "timestamp" + timestamp_file.touch() + + time.sleep(1) + client.run("dd", "if=/dev/zero", f"of={file_c}", "bs=100", "count=1", env=client_env) + + sfind_bin = find_command("sfind", test_workspace.bindirs) + assert sfind_bin, "sfind binary not found" + + common_args = ["-M", str(mount_dir), "-S", "1", "--server-side"] + + import re + def check_match(ret, expected_matches, expected_total=3): + output = ret.stdout.decode() + log.info(f"SFIND OUTPUT:\n{output}") + match = re.search(r"MATCHED (\d+)/(\d+)", output) + if not match: + assert False, f"Could not find MATCHED line in output:\n{output}" + + actual_matches = int(match.group(1)) + total_checked = int(match.group(2)) + + log.info(f"Actual matches: {actual_matches}, Total checked: {total_checked}") + assert actual_matches == expected_matches, f"Expected {expected_matches} matches, but found {actual_matches}. Full output:\n{output}" + assert total_checked == expected_total, f"Expected {expected_total} total checked, but found {total_checked}. Full output:\n{output}" + + # Scenario 1: Filter by Name + log.info("Testing -name filter...") + ret = client.run(str(sfind_bin), str(mount_dir), "-name", "file_a", *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, 1) + + # Scenario 2: Filter by Size (100c) + log.info("Testing -size filter...") + # Should match file_a and file_c (both 100 bytes) + ret = client.run(str(sfind_bin), str(mount_dir), "-size", "100c", *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, 2) + + # Scenario 3: Filter by Size (200c) + log.info("Testing -size filter (200c)...") + # Should match file_b + ret = client.run(str(sfind_bin), str(mount_dir), "-size", "200c", *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, 1) + + # Scenario 4: Filter by Newer (ctime) + log.info("Testing -newer filter...") + # Should match file_c (created after timestamp_file) + ret = client.run(str(sfind_bin), str(mount_dir), "-newer", str(timestamp_file), *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, 1) + + # Scenario 5: Combined Filter (Name *file* AND Size 100c AND Newer) + log.info("Testing combined filter...") + # Should match only file_c + ret = client.run(str(sfind_bin), str(mount_dir), "-name", "*file*", "-size", "100c", "-newer", str(timestamp_file), *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, 1) + + log.info("Packing order verification successful!") + + finally: + # Dump daemon log to inspect scanned keys and ctime + # daemon_log = test_workspace.logdir / "daemon.log" + # if daemon_log.exists(): + ## log.info("DUMPING DAEMON LOG DEBUG ENTRIES:") + # with open(daemon_log, 'r') as f: + # for line in f: + # if "DEBUG" in line: + # print(line.strip()) + #else: + # log.warning(f"Daemon log not found at {daemon_log}") + daemon.shutdown()