From 6b7aa13d58a1c820de10967cef300f301fd48293 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 25 Feb 2026 19:56:53 +0100 Subject: [PATCH 1/4] added test, and CREATE_EXIST_CHECK env --- README.md | 1 + include/config.hpp | 2 +- include/daemon/env.hpp | 2 + src/daemon/daemon.cpp | 11 ++- src/daemon/ops/metadentry.cpp | 15 +++- .../directories/test_backend_cleanup.py | 70 +++++++++++++++++++ 6 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 tests/integration/directories/test_backend_cleanup.py diff --git a/README.md b/README.md index c48f77cea..3898baf13 100644 --- a/README.md +++ b/README.md @@ -662,6 +662,7 @@ Using two environment variables ### Daemon #### Core - `GKFS_DAEMON_CREATE_CHECK_PARENTS` - Enable checking parent directory for existence before creating children. +- `GKFS_DAEMON_CREATE_EXIST_CHECK` - Check for existence of file metadata before create in RocksDB. - `GKFS_DAEMON_SYMLINK_SUPPORT` - Enable support for symbolic links. - `GKFS_DAEMON_RENAME_SUPPORT` - Enable support for rename. #### Logging diff --git a/include/config.hpp b/include/config.hpp index 1eac94040..47f7b6b8b 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -129,7 +129,7 @@ constexpr auto implicit_data_removal = true; // metadata logic // Check for existence of file metadata before create. This done on RocksDB // level -constexpr auto create_exist_check = true; +inline bool create_exist_check = true; // Verify that a parent directory exists before creating new files or // directories inline bool create_check_parents = true; diff --git a/include/daemon/env.hpp b/include/daemon/env.hpp index 4e141a528..42fbbf6a7 100644 --- a/include/daemon/env.hpp +++ b/include/daemon/env.hpp @@ -56,6 +56,8 @@ static constexpr auto USE_DIRENTS_COMPRESSION = static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); static constexpr auto DAEMON_CREATE_CHECK_PARENTS = ADD_PREFIX("DAEMON_CREATE_CHECK_PARENTS"); +static constexpr auto DAEMON_CREATE_EXIST_CHECK = + ADD_PREFIX("DAEMON_CREATE_EXIST_CHECK"); static constexpr auto DAEMON_SYMLINK_SUPPORT = ADD_PREFIX("DAEMON_SYMLINK_SUPPORT"); static constexpr auto DAEMON_RENAME_SUPPORT = diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 6d10c6305..2fc5e21ef 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -381,6 +381,11 @@ init_environment() { gkfs::config::metadata::create_check_parents ? "ON" : "OFF") == "ON"; + gkfs::config::metadata::create_exist_check = + gkfs::env::get_var(gkfs::env::DAEMON_CREATE_EXIST_CHECK, + gkfs::config::metadata::create_exist_check + ? "ON" + : "OFF") == "ON"; gkfs::config::metadata::symlink_support = gkfs::env::get_var(gkfs::env::DAEMON_SYMLINK_SUPPORT, gkfs::config::metadata::symlink_support @@ -393,10 +398,11 @@ init_environment() { : "OFF") == "ON"; GKFS_DATA->spdlogger()->info( - "{}() Inline data: {} / Dirents compression: {} / Create check parents: {} / Symlink support: {} / Rename support: {}", + "{}() Inline data: {} / Dirents compression: {} / Create check parents: {} / Create exist check: {} / Symlink support: {} / Rename support: {}", __func__, gkfs::config::metadata::use_inline_data, gkfs::config::rpc::use_dirents_compression, gkfs::config::metadata::create_check_parents, + gkfs::config::metadata::create_exist_check, gkfs::config::metadata::symlink_support, gkfs::config::metadata::rename_support); @@ -1066,6 +1072,9 @@ main(int argc, const char* argv[]) { #else cout << "Create check parents: OFF" << endl; #endif + cout << "Create exist check: " + << (gkfs::config::metadata::create_exist_check ? "ON" : "OFF") + << endl; cout << "Chunk size: " << gkfs::config::rpc::chunksize << " bytes" << endl; return EXIT_SUCCESS; diff --git a/src/daemon/ops/metadentry.cpp b/src/daemon/ops/metadentry.cpp index 5f7b504b6..d256fedab 100644 --- a/src/daemon/ops/metadentry.cpp +++ b/src/daemon/ops/metadentry.cpp @@ -82,8 +82,19 @@ void create(const std::string& path, Metadata& md) { // update metadata object based on what metadata is needed - if constexpr(gkfs::config::metadata::create_exist_check) { - GKFS_DATA->mdb()->put_no_exist(path, md.serialize()); + if(gkfs::config::metadata::create_exist_check) { + bool exists = GKFS_DATA->mdb()->exists(path); + if(exists) { + // If it exists, put_no_exist would typically throw an error. + // For now, we'll assume the intent is to do nothing or let the + // backend handle the error. The original `put_no_exist` implies an + // error if it exists. To maintain similar behavior, we should + // probably throw an error here or rely on the backend. For now, + // we'll just call put_no_exist as it was, but without constexpr. + GKFS_DATA->mdb()->put_no_exist(path, md.serialize()); + } else { + GKFS_DATA->mdb()->put(path, md.serialize()); + } } else { GKFS_DATA->mdb()->put(path, md.serialize()); } diff --git a/tests/integration/directories/test_backend_cleanup.py b/tests/integration/directories/test_backend_cleanup.py new file mode 100644 index 000000000..9b7227b12 --- /dev/null +++ b/tests/integration/directories/test_backend_cleanup.py @@ -0,0 +1,70 @@ +import harness +from pathlib import Path +import errno +import stat +import os +import pytest +from harness.logger import logger + +@pytest.mark.parametrize("client_fixture", ["gkfs_client", "gkfs_clientLibc"]) +def test_backend_cleanup(client_fixture, request, gkfs_daemon): + gkfs_client = request.getfixturevalue(client_fixture) + mountdir = gkfs_daemon.mountdir + rootdir = gkfs_daemon.rootdir + + file_path = mountdir / "cleanup_file" + dir_path = mountdir / "cleanup_dir" + + # 1. Create a directory + ret = gkfs_client.mkdir(dir_path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) + assert ret.retval == 0 + + # 2. Create a file + ret = gkfs_client.open(file_path, os.O_CREAT | os.O_WRONLY, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) + assert ret.retval != -1 + + # 3. Write some data to generate chunks + buf = b'backend_cleanup_test_data' * 1024 + ret = gkfs_client.write(file_path, buf, len(buf)) + assert ret.retval == len(buf) + + # Let's see what's in the backend rootdir + # Usually chunks are in `chunks` directory + # We will just walk the rootdir and count files/directories to see what exists + def get_backend_contents(directory): + contents = [] + for root, dirs, files in os.walk(directory): + for d in dirs: + contents.append(os.path.join(root, d)) + for f in files: + contents.append(os.path.join(root, f)) + return contents + + contents_before = get_backend_contents(rootdir) + logger.info(f"Backend contents before deletion: {contents_before}") + + # Check that chunks exist somewhere in rootdir + # GekkoFS stores chunks typically in `/chunks/` + chunk_dir = rootdir / "chunks" + + # Ensure properties exist + assert chunk_dir.exists() + + # Count chunks in chunk_dir + chunks_before = get_backend_contents(chunk_dir) + assert len(chunks_before) > 0, f"Expected chunks in {chunk_dir}, but found none. Total backend: {contents_before}" + + # 4. Remove file and directory + ret = gkfs_client.unlink(file_path) + assert ret.retval == 0 + + ret = gkfs_client.rmdir(dir_path) + assert ret.retval == 0 + + # 5. Check backend again + contents_after = get_backend_contents(rootdir) + logger.info(f"Backend contents after deletion: {contents_after}") + + chunks_after = get_backend_contents(chunk_dir) + # The chunk directory corresponding to the file should be deleted or empty + assert len(chunks_after) == 0, f"Expected data directory to be empty, but found: {chunks_after}" -- GitLab From f3ca272d9fb0a8b9d6f6ae201b5c1040fd8f18f9 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 26 Feb 2026 06:46:37 +0100 Subject: [PATCH 2/4] reduced write --- tests/integration/directories/test_backend_cleanup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/directories/test_backend_cleanup.py b/tests/integration/directories/test_backend_cleanup.py index 9b7227b12..e1499a1fe 100644 --- a/tests/integration/directories/test_backend_cleanup.py +++ b/tests/integration/directories/test_backend_cleanup.py @@ -24,7 +24,7 @@ def test_backend_cleanup(client_fixture, request, gkfs_daemon): assert ret.retval != -1 # 3. Write some data to generate chunks - buf = b'backend_cleanup_test_data' * 1024 + buf = b'backend_cleanup_test_data' * 10 ret = gkfs_client.write(file_path, buf, len(buf)) assert ret.retval == len(buf) -- GitLab From 8701c6733c72be823c7a6da0f451fdb99b65ede6 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 26 Feb 2026 07:50:35 +0100 Subject: [PATCH 3/4] fix --- .gitmodules | 3 --- src/daemon/ops/metadentry.cpp | 13 +------------ .../integration/directories/test_backend_cleanup.py | 5 ++--- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/.gitmodules b/.gitmodules index 1fe5c04e2..3487bb38d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "external/hermes"] - path = external/hermes - url = https://github.com/gekkofs/hermes.git [submodule "tests/scripts/bats"] path = tests/scripts/bats url = https://github.com/bats-core/bats-core.git diff --git a/src/daemon/ops/metadentry.cpp b/src/daemon/ops/metadentry.cpp index d256fedab..47c82c379 100644 --- a/src/daemon/ops/metadentry.cpp +++ b/src/daemon/ops/metadentry.cpp @@ -83,18 +83,7 @@ create(const std::string& path, Metadata& md) { // update metadata object based on what metadata is needed if(gkfs::config::metadata::create_exist_check) { - bool exists = GKFS_DATA->mdb()->exists(path); - if(exists) { - // If it exists, put_no_exist would typically throw an error. - // For now, we'll assume the intent is to do nothing or let the - // backend handle the error. The original `put_no_exist` implies an - // error if it exists. To maintain similar behavior, we should - // probably throw an error here or rely on the backend. For now, - // we'll just call put_no_exist as it was, but without constexpr. - GKFS_DATA->mdb()->put_no_exist(path, md.serialize()); - } else { - GKFS_DATA->mdb()->put(path, md.serialize()); - } + GKFS_DATA->mdb()->put_no_exist(path, md.serialize()); } else { GKFS_DATA->mdb()->put(path, md.serialize()); } diff --git a/tests/integration/directories/test_backend_cleanup.py b/tests/integration/directories/test_backend_cleanup.py index e1499a1fe..dcb293794 100644 --- a/tests/integration/directories/test_backend_cleanup.py +++ b/tests/integration/directories/test_backend_cleanup.py @@ -24,9 +24,8 @@ def test_backend_cleanup(client_fixture, request, gkfs_daemon): assert ret.retval != -1 # 3. Write some data to generate chunks - buf = b'backend_cleanup_test_data' * 10 - ret = gkfs_client.write(file_path, buf, len(buf)) - assert ret.retval == len(buf) + ret = gkfs_client.write_validate(file_path, 600000) + assert ret.retval == 0 # Let's see what's in the backend rootdir # Usually chunks are in `chunks` directory -- GitLab From 242354fcd3118500d308501c37a37b048fa8252c Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 26 Feb 2026 07:57:26 +0100 Subject: [PATCH 4/4] remove hermes --- CMakeLists.txt | 11 +---------- CMakePresets.json | 2 +- docs/sphinx/devs/coverage.md | 2 +- external/hermes | 1 - include/client/logging.hpp | 19 +++---------------- src/client/preload_util.cpp | 16 +++++----------- 6 files changed, 11 insertions(+), 40 deletions(-) delete mode 160000 external/hermes diff --git a/CMakeLists.txt b/CMakeLists.txt index ebd19b8d7..76a5607d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ message(STATUS "[${PROJECT_NAME}] Build type: ${CMAKE_BUILD_TYPE}") # Compiler flags for various cmake build types set(WARNINGS_FLAGS "-Wall -Wextra --pedantic -Wno-unused-parameter -Wno-missing-field-initializers") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -O3") -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0 -DGKFS_DEBUG_BUILD -DHERMES_DEBUG_BUILD") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0 -DGKFS_DEBUG_BUILD") set(CMAKE_CXX_FLAGS_MEMCHECK "${WARNINGS_FLAGS} -g -O0 -fsanitize=address -fno-omit-frame-pointer") set(CMAKE_CXX_FLAGS_MAINTAINER "${WARNINGS_FLAGS} -g -O0 -pg -no-pie") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DNDEBUG -O3") @@ -334,15 +334,6 @@ endif () configure_file(include/common/cmake_configure.hpp.in include/common/cmake_configure.hpp) -if (GKFS_ENABLE_CLIENT_LOG) - option(HERMES_LOGGING "" ON) - option(HERMES_LOGGING_FMT_USE_BUNDLED "" OFF) - option(HERMES_LOGGING_FMT_HEADER_ONLY "" OFF) -endif () - -option(HERMES_MARGO_COMPATIBLE_RPCS "" ON) -add_subdirectory(external/hermes) -target_include_directories(hermes INTERFACE external/hermes/include) set(INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include") diff --git a/CMakePresets.json b/CMakePresets.json index 117758343..ae1a7f3b0 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -39,7 +39,7 @@ "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug", - "CMAKE_CXX_FLAGS": "-Wall -Wextra -fdiagnostics-color=always --pedantic -Wno-unused-parameter -Wno-missing-field-initializers -DGKFS_DEBUG_BUILD -DHERMES_DEBUG_BUILD" + "CMAKE_CXX_FLAGS": "-Wall -Wextra -fdiagnostics-color=always --pedantic -Wno-unused-parameter -Wno-missing-field-initializers -DGKFS_DEBUG_BUILD" } }, { diff --git a/docs/sphinx/devs/coverage.md b/docs/sphinx/devs/coverage.md index 29070570d..b4c5657fc 100644 --- a/docs/sphinx/devs/coverage.md +++ b/docs/sphinx/devs/coverage.md @@ -51,7 +51,7 @@ Preset CMake variables: CMAKE_BUILD_TYPE="Coverage" CMAKE_CXX_COMPILER="/usr/bin/g++" - CMAKE_CXX_FLAGS="-Wall -Wextra -fdiagnostics-color=always --pedantic -Wno-unused-parameter -Wno-missing-field-initializers -DGKFS_DEBUG_BUILD -DHERMES_DEBUG_BUILD" + CMAKE_CXX_FLAGS="-Wall -Wextra -fdiagnostics-color=always --pedantic -Wno-unused-parameter -Wno-missing-field-initializers -DGKFS_DEBUG_BUILD" CMAKE_CXX_FLAGS_COVERAGE="-Og -g --coverage -fkeep-static-functions" CMAKE_C_COMPILER="/usr/bin/gcc" CMAKE_C_FLAGS_COVERAGE="-Og -g --coverage -fkeep-static-functions" diff --git a/external/hermes b/external/hermes deleted file mode 160000 index 072c03322..000000000 --- a/external/hermes +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 072c033225057aefdbdcc852ba0a360706668800 diff --git a/include/client/logging.hpp b/include/client/logging.hpp index 7d9c7f9c9..692643a64 100644 --- a/include/client/logging.hpp +++ b/include/client/logging.hpp @@ -65,7 +65,6 @@ enum class log_level : unsigned int { print_critical = 1 << 3, print_errors = 1 << 4, print_warnings = 1 << 5, - print_hermes = 1 << 6, print_mercury = 1 << 7, print_debug = 1 << 8, print_trace_reads = 1 << 9, @@ -73,8 +72,8 @@ enum class log_level : unsigned int { // for internal use print_none = 0, print_all = print_syscalls | print_syscalls_entry | print_info | - print_critical | print_errors | print_warnings | print_hermes | - print_mercury | print_debug, + print_critical | print_errors | print_warnings | print_mercury | + print_debug, print_most = print_all & ~print_syscalls_entry, print_help = 1 << 10 }; @@ -126,7 +125,6 @@ static const auto constexpr info = log_level::print_info; static const auto constexpr critical = log_level::print_critical; static const auto constexpr error = log_level::print_errors; static const auto constexpr warning = log_level::print_warnings; -static const auto constexpr hermes = log_level::print_hermes; static const auto constexpr mercury = log_level::print_mercury; static const auto constexpr debug = log_level::print_debug; static const auto constexpr trace_reads = log_level::print_trace_reads; @@ -138,7 +136,7 @@ static const auto constexpr help = log_level::print_help; static const auto constexpr level_names = utils::make_array( "syscall", "syscall", // sycall_entry uses the same name as syscall - "info", "critical", "error", "warning", "hermes", "mercury", "debug", + "info", "critical", "error", "warning", "mercury", "debug", "trace_reads"); inline constexpr auto @@ -582,9 +580,6 @@ destroy_global_logger() { #define LOG_CRITICAL(...) \ do { \ } while(0); -#define LOG_HERMES(...) \ - do { \ - } while(0); #define LOG_MERCURY(...) \ do { \ } while(0); @@ -632,14 +627,6 @@ destroy_global_logger() { } \ } while(0); -#define LOG_HERMES(...) \ - do { \ - if(gkfs::log::get_global_logger()) { \ - gkfs::log::get_global_logger()->log(gkfs::log::hermes, __func__, \ - __LINE__, __VA_ARGS__); \ - } \ - } while(0); - #define LOG_MERCURY(...) \ do { \ if(gkfs::log::get_global_logger()) { \ diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index a0a728837..a9aeba4df 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -69,18 +69,12 @@ using namespace std; namespace { + /** - * Looks up a host endpoint via Hermes - * @param uri - * @param max_retries - * @return hermes endpoint, if successful - * @throws std::runtime_error - */ -/** - * Looks up a host endpoint via Hermes + * Looks up a host endpoint via thallium * @param uri * @param max_retries - * @return hermes endpoint, if successful + * @return thallium endpoint, if successful * @throws std::runtime_error */ thallium::endpoint @@ -486,7 +480,7 @@ read_hosts_file() { } /** - * Connects to daemons and lookup Mercury URI addresses via Hermes + * Connects to daemons and lookup Mercury URI addresses via Thallium * @param hosts vector> * @throws std::runtime_error through lookup_endpoint() */ @@ -590,7 +584,7 @@ check_for_proxy() { } /** - * Lookup proxy address via hermes RPC client + * Lookup proxy address via Thallium RPC client * @throws runtime_error */ void -- GitLab