From 8792c05f86cfde8c180458b87f32f0250ea9775c Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Fri, 6 Feb 2026 14:16:00 +0100 Subject: [PATCH 1/2] fix compressed order --- include/client/rpc/utils.hpp | 30 ++++- src/client/rpc/forward_metadata.cpp | 53 ++++++-- .../directories/test_packing_order.py | 118 ++++++++++++++++++ 3 files changed, 187 insertions(+), 14 deletions(-) create mode 100644 tests/integration/directories/test_packing_order.py diff --git a/include/client/rpc/utils.hpp b/include/client/rpc/utils.hpp index 0d75693b..55718c4f 100644 --- a/include/client/rpc/utils.hpp +++ b/include/client/rpc/utils.hpp @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include namespace gkfs::rpc { @@ -104,16 +106,36 @@ decompress_and_parse_entries(const OutputOrErr& out, std::vector> entries; while(p < end) { - bool is_dir = *reinterpret_cast(p); - p += sizeof(bool); + if(p + sizeof(uint8_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", __func__); + break; + } + bool is_dir = (*reinterpret_cast(p) != 0); + p += sizeof(uint8_t); + + if(p + sizeof(size_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing file_size", __func__); + break; + } size_t file_size = *reinterpret_cast(p); p += sizeof(size_t); + + if(p + sizeof(time_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing ctime", __func__); + break; + } time_t ctime = *reinterpret_cast(p); p += sizeof(time_t); - std::string name(p); - p += name.length() + 1; + // Name is null-terminated. We must check we don't go past 'end'. + size_t name_len = strnlen(p, end - p); + if(p + name_len >= end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", __func__); + break; + } + std::string name(p, name_len); + p += name_len + 1; if(!name.empty()) { entries.emplace_back(name, is_dir, file_size, ctime); diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index cfe72f37..9338bfeb 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -36,6 +36,8 @@ #include #include #include +#include +#include using namespace std; @@ -246,11 +248,22 @@ decompress_and_parse_entries_standard( entries.reserve(out.dirents_size); // Approx while(p < end) { - bool is_dir = (*p != 0); - p += 1; + if(p + sizeof(uint8_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", __func__); + break; + } + bool is_dir = (*reinterpret_cast(p) != 0); + p += sizeof(uint8_t); + + // Name is null-terminated. We must check we don't go past 'end'. + size_t name_len = strnlen(p, end - p); + if(p + name_len >= end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", __func__); + break; + } - std::string name(p); - p += name.length() + 1; + std::string name(p, name_len); + p += name_len + 1; if(!name.empty()) { entries.emplace_back(name, is_dir, 0, 0); @@ -311,24 +324,44 @@ decompress_and_parse_entries_filtered( p = static_cast(compressed_buffer); end = p + out.dirents_size; } - std::vector> entries; // We don't know exact count, but can optimize reserve? // entries.reserve(out.dirents_size / 32); while(p < end) { - bool is_dir = (*p != 0); - p += 1; - - std::string name(p); - p += name.length() + 1; + if(p + sizeof(uint8_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", __func__); + break; + } + bool is_dir = (*reinterpret_cast(p) != 0); + p += sizeof(uint8_t); + if(p + sizeof(size_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing size", + __func__); + break; + } size_t size = *reinterpret_cast(p); p += sizeof(size_t); + if(p + sizeof(time_t) > end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing ctime", + __func__); + break; + } time_t ctime = *reinterpret_cast(p); p += sizeof(time_t); + // Name is null-terminated. We must check we don't go past 'end'. + size_t name_len = strnlen(p, end - p); + if(p + name_len >= end) { + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", __func__); + break; + } + + std::string name(p, name_len); + p += name_len + 1; + if(!name.empty()) { entries.emplace_back(name, is_dir, size, ctime); } diff --git a/tests/integration/directories/test_packing_order.py b/tests/integration/directories/test_packing_order.py new file mode 100644 index 00000000..1b59745d --- /dev/null +++ b/tests/integration/directories/test_packing_order.py @@ -0,0 +1,118 @@ + +import pytest +import logging +import time +from harness.gkfs import Daemon, ShellClient, Client, find_command + +log = logging.getLogger(__name__) + +def test_packing_order_all_fields(test_workspace, request): + """ + Comprehensive test to verify the packing order of compressed directory entries. + It checks filtering by name, size, and ctime (newer). + """ + + # Start Daemon with compression ON + daemon_env = { + "GKFS_DAEMON_LOG_LEVEL": "info", + "GKFS_DAEMON_USE_DIRENTS_COMPRESSION": "ON" + } + daemon = Daemon(request.config.getoption('--interface'), "rocksdb", test_workspace, env=daemon_env) + daemon.run() + + try: + client = ShellClient(test_workspace) + mount_dir = test_workspace.mountdir + client_env = {"LIBGKFS_USE_DIRENTS_COMPRESSION": "ON"} + + # 1. Create files with specific properties + # file_a: size 100 + # file_b: size 200 + # (wait) + # file_c: size 100 + + file_a = mount_dir / "file_a" + file_b = mount_dir / "file_b" + file_c = mount_dir / "file_c" + + # Use dd to create files with specific sizes + client.run("dd", "if=/dev/zero", f"of={file_a}", "bs=100", "count=1", env=client_env) + client.run("dd", "if=/dev/zero", f"of={file_b}", "bs=200", "count=1", env=client_env) + + log.info("Waiting 2 seconds to distinguish ctimes...") + time.sleep(2) + + # Create a timestamp file for 'newer' check + timestamp_file = test_workspace.twd / "timestamp" + timestamp_file.touch() + + time.sleep(1) + client.run("dd", "if=/dev/zero", f"of={file_c}", "bs=100", "count=1", env=client_env) + + sfind_bin = find_command("sfind", test_workspace.bindirs) + assert sfind_bin, "sfind binary not found" + + common_args = ["-M", str(mount_dir), "-S", "1", "--server-side"] + + import re + def check_match(ret, expected): + output = ret.stdout.decode() + log.info(f"SFIND OUTPUT:\n{output}") + # Search for the MATCHED line, ignoring any prepended SLURM warnings + match = re.search(rf"MATCHED {expected}", output) + if not match: + # Capture the actual matched line if it exists to aid debugging + actual_match = re.search(r"MATCHED (\d+/\d+)", output) + actual_val = actual_match.group(1) if actual_match else "NOT FOUND" + assert match, f"Expected 'MATCHED {expected}', but found '{actual_val}'. Full output:\n{output}" + + # Scenario 1: Filter by Name + log.info("Testing -name filter...") + ret = client.run(str(sfind_bin), str(mount_dir), "-name", "file_a", *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, "1/3") + + # Scenario 2: Filter by Size (100c) + log.info("Testing -size filter...") + # Should match file_a and file_c (both 100 bytes) + ret = client.run(str(sfind_bin), str(mount_dir), "-size", "100c", *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, "2/3") + + # Scenario 3: Filter by Size (200c) + log.info("Testing -size filter (200c)...") + # Should match file_b + ret = client.run(str(sfind_bin), str(mount_dir), "-size", "200c", *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, "1/3") + + # Scenario 4: Filter by Newer (ctime) + log.info("Testing -newer filter...") + # Should match file_c (created after timestamp_file) + ret = client.run(str(sfind_bin), str(mount_dir), "-newer", str(timestamp_file), *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, "1/3") + + # Scenario 5: Combined Filter (Name *file* AND Size 100c AND Newer) + log.info("Testing combined filter...") + # Should match only file_c + ret = client.run(str(sfind_bin), str(mount_dir), "-name", "*file*", "-size", "100c", "-newer", str(timestamp_file), *common_args, env=client_env) + assert ret.exit_code == 0 + check_match(ret, "1/3") + + log.info("Packing order verification successful!") + + except Exception: + # Dump logs + client_log = test_workspace.logdir / 'gkfs_client.log' + if client_log.exists(): + print("\n=== CLIENT LOG ===") + print(client_log.read_text()) + + daemon_log = test_workspace.logdir / 'gkfs_daemon.log' + if daemon_log.exists(): + print("\n=== DAEMON LOG ===") + print(daemon_log.read_text()) + raise + finally: + daemon.shutdown() -- GitLab From d66bd0f8543055727078e30bda73aa184a968fe2 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Fri, 6 Feb 2026 16:42:32 +0100 Subject: [PATCH 2/2] packing order test --- CHANGELOG.md | 2 + include/client/fuse/fuse_client.hpp | 10 ++-- include/client/rpc/utils.hpp | 12 +++-- src/client/rpc/forward_metadata.cpp | 12 +++-- .../backend/metadata/rocksdb_backend.cpp | 4 +- .../directories/test_packing_order.py | 51 ++++++++++--------- 6 files changed, 51 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3795f466..b55389c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Fixed - SYS_lstat does not exists on some architectures, change to newfstatat ([!269](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/269)) - We cannot use lstat directly as may cause a recursion call on libc interception. + - Un/Packing order of directory entries in compressed format was incorrect ([!281](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/281)) + ## [0.9.5] - 2025-08 ### New diff --git a/include/client/fuse/fuse_client.hpp b/include/client/fuse/fuse_client.hpp index a512d5a2..cbc0a6fe 100644 --- a/include/client/fuse/fuse_client.hpp +++ b/include/client/fuse/fuse_client.hpp @@ -36,7 +36,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later */ - +// clang-format off #ifndef GKFS_CLIENT_FUSE_CONTEXT_HPP #define GKFS_CLIENT_FUSE_CONTEXT_HPP @@ -101,10 +101,10 @@ _Static_assert(!gkfs::config::fuse::pointertrick || #else struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { unsigned _uintptr_to_must_hold_fuse_ino_t - : (!gkfs::config::fuse::pointertrick || - (sizeof(fuse_ino_t) >= sizeof(uintptr_t)) - ? 1 - : -1); + : (!gkfs::config::fuse::pointertrick || + (sizeof(fuse_ino_t) >= sizeof(uintptr_t)) + ? 1 + : -1); }; #endif diff --git a/include/client/rpc/utils.hpp b/include/client/rpc/utils.hpp index 55718c4f..ca537e03 100644 --- a/include/client/rpc/utils.hpp +++ b/include/client/rpc/utils.hpp @@ -107,21 +107,24 @@ decompress_and_parse_entries(const OutputOrErr& out, while(p < end) { if(p + sizeof(uint8_t) > end) { - LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", __func__); + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", + __func__); break; } bool is_dir = (*reinterpret_cast(p) != 0); p += sizeof(uint8_t); if(p + sizeof(size_t) > end) { - LOG(ERROR, "{}() Unexpected end of buffer while parsing file_size", __func__); + LOG(ERROR, "{}() Unexpected end of buffer while parsing file_size", + __func__); break; } size_t file_size = *reinterpret_cast(p); p += sizeof(size_t); if(p + sizeof(time_t) > end) { - LOG(ERROR, "{}() Unexpected end of buffer while parsing ctime", __func__); + LOG(ERROR, "{}() Unexpected end of buffer while parsing ctime", + __func__); break; } time_t ctime = *reinterpret_cast(p); @@ -130,7 +133,8 @@ decompress_and_parse_entries(const OutputOrErr& out, // Name is null-terminated. We must check we don't go past 'end'. size_t name_len = strnlen(p, end - p); if(p + name_len >= end) { - LOG(ERROR, "{}() Unexpected end of buffer while parsing name", __func__); + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", + __func__); break; } diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index 9338bfeb..88abb497 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -249,7 +249,8 @@ decompress_and_parse_entries_standard( while(p < end) { if(p + sizeof(uint8_t) > end) { - LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", __func__); + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", + __func__); break; } bool is_dir = (*reinterpret_cast(p) != 0); @@ -258,7 +259,8 @@ decompress_and_parse_entries_standard( // Name is null-terminated. We must check we don't go past 'end'. size_t name_len = strnlen(p, end - p); if(p + name_len >= end) { - LOG(ERROR, "{}() Unexpected end of buffer while parsing name", __func__); + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", + __func__); break; } @@ -330,7 +332,8 @@ decompress_and_parse_entries_filtered( while(p < end) { if(p + sizeof(uint8_t) > end) { - LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", __func__); + LOG(ERROR, "{}() Unexpected end of buffer while parsing is_dir", + __func__); break; } bool is_dir = (*reinterpret_cast(p) != 0); @@ -355,7 +358,8 @@ decompress_and_parse_entries_filtered( // Name is null-terminated. We must check we don't go past 'end'. size_t name_len = strnlen(p, end - p); if(p + name_len >= end) { - LOG(ERROR, "{}() Unexpected end of buffer while parsing name", __func__); + LOG(ERROR, "{}() Unexpected end of buffer while parsing name", + __func__); break; } diff --git a/src/daemon/backend/metadata/rocksdb_backend.cpp b/src/daemon/backend/metadata/rocksdb_backend.cpp index 2c6434c1..cc5829bc 100644 --- a/src/daemon/backend/metadata/rocksdb_backend.cpp +++ b/src/daemon/backend/metadata/rocksdb_backend.cpp @@ -580,8 +580,6 @@ RocksDBBackend::get_dirents_filtered_impl(const std::string& dir, eof = false; break; } - scanned_count++; - // Get File name auto name = it->key().ToString(); // save as potential last key (full path for now, stripped later if @@ -609,6 +607,8 @@ RocksDBBackend::get_dirents_filtered_impl(const std::string& dir, continue; } + scanned_count++; + Metadata md(it->value().ToString()); #ifdef HAS_RENAME // Remove entries with negative blocks (rename) diff --git a/tests/integration/directories/test_packing_order.py b/tests/integration/directories/test_packing_order.py index 1b59745d..d650f330 100644 --- a/tests/integration/directories/test_packing_order.py +++ b/tests/integration/directories/test_packing_order.py @@ -11,7 +11,6 @@ def test_packing_order_all_fields(test_workspace, request): Comprehensive test to verify the packing order of compressed directory entries. It checks filtering by name, size, and ctime (newer). """ - # Start Daemon with compression ON daemon_env = { "GKFS_DAEMON_LOG_LEVEL": "info", @@ -41,6 +40,7 @@ def test_packing_order_all_fields(test_workspace, request): log.info("Waiting 2 seconds to distinguish ctimes...") time.sleep(2) + log.info("Resuming test after sleep.") # Create a timestamp file for 'newer' check timestamp_file = test_workspace.twd / "timestamp" @@ -55,64 +55,65 @@ def test_packing_order_all_fields(test_workspace, request): common_args = ["-M", str(mount_dir), "-S", "1", "--server-side"] import re - def check_match(ret, expected): + def check_match(ret, expected_matches, expected_total=3): output = ret.stdout.decode() log.info(f"SFIND OUTPUT:\n{output}") - # Search for the MATCHED line, ignoring any prepended SLURM warnings - match = re.search(rf"MATCHED {expected}", output) + match = re.search(r"MATCHED (\d+)/(\d+)", output) if not match: - # Capture the actual matched line if it exists to aid debugging - actual_match = re.search(r"MATCHED (\d+/\d+)", output) - actual_val = actual_match.group(1) if actual_match else "NOT FOUND" - assert match, f"Expected 'MATCHED {expected}', but found '{actual_val}'. Full output:\n{output}" + assert False, f"Could not find MATCHED line in output:\n{output}" + + actual_matches = int(match.group(1)) + total_checked = int(match.group(2)) + + log.info(f"Actual matches: {actual_matches}, Total checked: {total_checked}") + assert actual_matches == expected_matches, f"Expected {expected_matches} matches, but found {actual_matches}. Full output:\n{output}" + assert total_checked == expected_total, f"Expected {expected_total} total checked, but found {total_checked}. Full output:\n{output}" # Scenario 1: Filter by Name log.info("Testing -name filter...") ret = client.run(str(sfind_bin), str(mount_dir), "-name", "file_a", *common_args, env=client_env) assert ret.exit_code == 0 - check_match(ret, "1/3") + check_match(ret, 1) # Scenario 2: Filter by Size (100c) log.info("Testing -size filter...") # Should match file_a and file_c (both 100 bytes) ret = client.run(str(sfind_bin), str(mount_dir), "-size", "100c", *common_args, env=client_env) assert ret.exit_code == 0 - check_match(ret, "2/3") + check_match(ret, 2) # Scenario 3: Filter by Size (200c) log.info("Testing -size filter (200c)...") # Should match file_b ret = client.run(str(sfind_bin), str(mount_dir), "-size", "200c", *common_args, env=client_env) assert ret.exit_code == 0 - check_match(ret, "1/3") + check_match(ret, 1) # Scenario 4: Filter by Newer (ctime) log.info("Testing -newer filter...") # Should match file_c (created after timestamp_file) ret = client.run(str(sfind_bin), str(mount_dir), "-newer", str(timestamp_file), *common_args, env=client_env) assert ret.exit_code == 0 - check_match(ret, "1/3") + check_match(ret, 1) # Scenario 5: Combined Filter (Name *file* AND Size 100c AND Newer) log.info("Testing combined filter...") # Should match only file_c ret = client.run(str(sfind_bin), str(mount_dir), "-name", "*file*", "-size", "100c", "-newer", str(timestamp_file), *common_args, env=client_env) assert ret.exit_code == 0 - check_match(ret, "1/3") + check_match(ret, 1) log.info("Packing order verification successful!") - except Exception: - # Dump logs - client_log = test_workspace.logdir / 'gkfs_client.log' - if client_log.exists(): - print("\n=== CLIENT LOG ===") - print(client_log.read_text()) - - daemon_log = test_workspace.logdir / 'gkfs_daemon.log' - if daemon_log.exists(): - print("\n=== DAEMON LOG ===") - print(daemon_log.read_text()) - raise finally: + # Dump daemon log to inspect scanned keys and ctime + # daemon_log = test_workspace.logdir / "daemon.log" + # if daemon_log.exists(): + ## log.info("DUMPING DAEMON LOG DEBUG ENTRIES:") + # with open(daemon_log, 'r') as f: + # for line in f: + # if "DEBUG" in line: + # print(line.strip()) + #else: + # log.warning(f"Daemon log not found at {daemon_log}") daemon.shutdown() -- GitLab