Loading include/config.hpp +40 −29 Original line number Diff line number Diff line /* Copyright 2018-2025, Barcelona Supercomputing Center (BSC), Spain Copyright 2015-2025, Johannes Gutenberg Universitaet Mainz, Germany Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). Loading @@ -8,16 +8,6 @@ This software was partially supported by the ADA-FS project under the SPPEXA project funded by the DFG. This software was partially supported by the the European Union’s Horizon 2020 JTI-EuroHPC research and innovation programme, by the project ADMIRE (Project ID: 956748, admire-eurohpc.eu) This project was partially promoted by the Ministry for Digital Transformation and the Civil Service, within the framework of the Recovery, Transformation and Resilience Plan - Funded by the European Union -NextGenerationEU. This file is part of GekkoFS. GekkoFS is free software: you can redistribute it and/or modify Loading Loading @@ -58,14 +48,14 @@ constexpr auto forwarding_file_path = ""; namespace cache { // Optimization for readdir which avoids consecutive stat calls constexpr bool use_dentry_cache = false; constexpr bool use_dentry_cache = false; // TODO true // When enabled, the dentry cache is cleared when a directory is closed. // Disabling this may cause semantic issues. constexpr bool clear_dentry_cache_on_close = true; // When enabled, write operations no longer update the file size on each write. // Instead, the size is updated every `write_size_flush_threshold` writes per // file. fsync/close flushes the size to the server immediately. constexpr bool use_write_size_cache = false; // Instead, the size is updated every `max_write_size_cache` writes per file. // fsync/close flushes the size to the server immediately. constexpr bool use_write_size_cache = false; // TODO true constexpr auto write_size_flush_threshold = 1000; } // namespace cache Loading Loading @@ -94,7 +84,7 @@ constexpr auto zero_buffer_before_read = false; * regardless of their order minimizing the gap between bulk transfers. * Due to spinning in a loop this increases CPU utilization */ constexpr auto spin_lock_read = false; constexpr auto spin_lock_read = true; } // namespace io namespace log { Loading Loading @@ -133,7 +123,26 @@ constexpr auto implicit_data_removal = true; // metadata logic // Check for existence of file metadata before create. This done on RocksDB // level constexpr auto create_exist_check = true; // Verify that a parent directory exists before creating new files or // directories inline bool create_check_parents = true; // Enable support for symbolic links in paths inline bool symlink_support = true; inline bool rename_support = true; // Use inline data for small files // Use inline data for small files inline bool use_inline_data = true; constexpr auto inline_data_size = 4096; // in bytes // Next options may break consistency, and definitely break tests. Use with // care. Optimize write operations for small files (files are not created until // a write appears) inline bool create_write_optimization = false; // Prefetch inline data on read operations inline bool read_inline_prefetch = false; constexpr auto create_exist_check = false; } // namespace metadata namespace data { // directory name below rootdir where chunks are placed Loading @@ -142,14 +151,14 @@ constexpr auto chunk_dir = "chunks"; namespace proxy { constexpr auto pid_path = "/tmp/gkfs_proxy.pid"; constexpr auto fwd_create = true; constexpr auto fwd_stat = true; constexpr auto fwd_remove = true; constexpr auto fwd_get_size = true; constexpr auto fwd_update_size = true; constexpr auto fwd_io = true; constexpr auto fwd_truncate = true; constexpr auto fwd_chunk_stat = true; constexpr auto fwd_create = false; constexpr auto fwd_stat = false; constexpr auto fwd_remove = false; constexpr auto fwd_get_size = false; constexpr auto fwd_update_size = false; constexpr auto fwd_io = false; constexpr auto fwd_truncate = false; constexpr auto fwd_chunk_stat = false; constexpr auto fwd_get_dirents_single = true; // Only use proxy for io if write/read size is higher than set value constexpr auto fwd_io_count_threshold = 0; Loading @@ -157,18 +166,19 @@ constexpr auto fwd_io_count_threshold = 0; } // namespace proxy namespace rpc { constexpr auto chunksize = 524288; // in bytes (e.g., 524288 == 512KB) constexpr auto chunksize = 524288; //1048576; // in bytes (e.g., 524288 == 512KB) // size of preallocated buffer to hold directory entries in rpc call constexpr auto dirents_buff_size = (8 * 1024 * 1024); // 8 mega constexpr auto dirents_buff_size_proxy = (128 * 1024 * 1024); // 8 mega constexpr auto async_opendir = true;//TODO maybe no? /* * Indicates the number of concurrent progress to drive I/O operations of chunk * files to and from local file systems The value is directly mapped to created * Argobots xstreams, controlled in a single pool with ABT_snoozer scheduler */ constexpr auto daemon_io_xstreams = 8; constexpr auto daemon_io_xstreams = 16; // Number of threads used for RPC handlers at the daemon constexpr auto daemon_handler_xstreams = 4; constexpr auto daemon_handler_xstreams = 8; // Number of threads used for RPC handlers at the proxy constexpr auto proxy_handler_xstreams = 3; } // namespace rpc Loading Loading @@ -196,3 +206,4 @@ constexpr auto check_inode = true; } // namespace gkfs::config #endif // GEKKOFS_CONFIG_HPP src/client/fuse/fuse_client.cpp +16 −2 Original line number Diff line number Diff line Loading @@ -378,6 +378,8 @@ setattr_handler(fuse_req_t req, fuse_ino_t ino, struct stat* attr, int to_set, return; } static int o_success = 0; static int o_failure = 0; static void open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { auto* ud = udata(req); Loading @@ -385,6 +387,7 @@ open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { auto* inode = get_inode(ino); if(!inode) { fuse_reply_err(req, ENOENT); o_failure++; return; } const int mode = 0644; // -rw-r--r-- I think that doesnt matter anyway Loading @@ -392,11 +395,13 @@ open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { fi->flags); // TODO mode! if(fd < 0) { fuse_reply_err(req, ENOENT); o_failure++; return; } fi->fh = fd; fi->direct_io = ud->direct_io; fuse_reply_open(req, fi); o_success++; } static void Loading Loading @@ -455,6 +460,8 @@ write_handler(fuse_req_t req, fuse_ino_t ino, const char* buf, size_t size, fuse_reply_write(req, rc); } static int c_success = 0; static int c_failure = 0; static void create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode, struct fuse_file_info* fi) { Loading @@ -481,8 +488,11 @@ create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode, DEBUG_INFO(ud, "create handler %s", path.c_str()); int fd = gkfs::syscall::gkfs_open(path, mode, fi->flags | O_CREAT); if(fd < 0) { DEBUG_INFO(ud, "create -> open failed errno %i", errno); fuse_reply_err(req, errno); // DEBUG_INFO(ud, "create -> open failed errno %i", errno); // errno = 0 causes EIO //fuse_reply_err(req, errno); fuse_reply_err(req, ENOENT); c_failure++; return; } fi->fh = fd; Loading @@ -492,9 +502,11 @@ create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode, int sc = fill_fuse_entry_param(ud, path, e, false); if(sc < 0) { fuse_reply_err(req, ENOENT); c_failure++; return; } fuse_reply_create(req, &e, fi); c_success++; } /// TODO normally, the file should only be removed if the lookup count is zero, Loading Loading @@ -1313,5 +1325,7 @@ main(int argc, char* argv[]) { } fuse_session_unmount(se); std::cout << "fuse open with success " << o_success << " failure " << o_failure << std::endl; std::cout << "fuse create with success " << c_success << " failure " << c_failure << std::endl; return ret < 0 ? 1 : 0; } src/client/hooks.cpp +4 −3 Original line number Diff line number Diff line Loading @@ -44,6 +44,7 @@ #include <client/gkfs_functions.hpp> #include <client/path.hpp> #include <client/open_dir.hpp> #include <iostream> #include <common/path_util.hpp> Loading Loading @@ -92,9 +93,9 @@ hook_openat(int dirfd, const char* cpath, int flags, mode_t mode) { return -ENOTDIR; case gkfs::preload::RelativizeStatus::internal: { struct stat st; syscall_no_intercept_wrapper( SYS_newfstatat, dirfd, ".", &st, AT_SYMLINK_NOFOLLOW); //struct stat st; //syscall_no_intercept_wrapper( //SYS_newfstatat, dirfd, ".", &st, AT_SYMLINK_NOFOLLOW); //auto md_ = gkfs::utils::get_metadata(resolved); // pass open to fuse to sync fd in file_map int fd = gsl::narrow_cast<int>(syscall_no_intercept_wrapper( Loading src/client/open_file_map.cpp +3 −36 Original line number Diff line number Diff line Loading @@ -70,7 +70,7 @@ OpenFile::OpenFile(const string& path, const int flags, FileType type) pos_ = 0; // If O_APPEND flag is used, it will be used before each write. } OpenFileMap::OpenFileMap() : fd_idx(10000), fd_validation_needed(false) {} OpenFileMap::OpenFileMap() : fd_idx(0), fd_validation_needed(false) {} string OpenFile::path() const { Loading Loading @@ -142,41 +142,8 @@ OpenFileMap::exist(const int fd) { int OpenFileMap::safe_generate_fd_idx_() { int fd = 0; if(CTX->protect_fds()) { fd = generate_fd_idx(); /* * Check if fd is still in use and generate another if yes * Note that this can only happen once the all fd indices within the int * has been used to the int::max Once this limit is exceeded, we set * fd_idx back to 3 and begin anew. Only then, if a file was open for a * long time will we have to generate another index. * * This situation can only occur when all fd indices have been given * away once and we start again, in which case the fd_validation_needed * flag is set. fd_validation is set to false, if */ if(fd_validation_needed) { while(exist(fd)) { fd = generate_fd_idx(); } } } else { // Return a virtual fd from 10000, but avoid doing all the FD movements if(CTX->range_fd()) { fd = generate_fd_idx(); if(fd_validation_needed) { while(exist(fd)) { fd = generate_fd_idx(); } } return fd; } fd = syscall_no_intercept(SYS_openat, AT_FDCWD, "/dev/null", O_RDWR, S_IRUSR | S_IWUSR); } return fd; std::lock_guard<std::mutex> inode_lock(fd_idx_mutex); return fd_idx++; } int Loading Loading
include/config.hpp +40 −29 Original line number Diff line number Diff line /* Copyright 2018-2025, Barcelona Supercomputing Center (BSC), Spain Copyright 2015-2025, Johannes Gutenberg Universitaet Mainz, Germany Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). Loading @@ -8,16 +8,6 @@ This software was partially supported by the ADA-FS project under the SPPEXA project funded by the DFG. This software was partially supported by the the European Union’s Horizon 2020 JTI-EuroHPC research and innovation programme, by the project ADMIRE (Project ID: 956748, admire-eurohpc.eu) This project was partially promoted by the Ministry for Digital Transformation and the Civil Service, within the framework of the Recovery, Transformation and Resilience Plan - Funded by the European Union -NextGenerationEU. This file is part of GekkoFS. GekkoFS is free software: you can redistribute it and/or modify Loading Loading @@ -58,14 +48,14 @@ constexpr auto forwarding_file_path = ""; namespace cache { // Optimization for readdir which avoids consecutive stat calls constexpr bool use_dentry_cache = false; constexpr bool use_dentry_cache = false; // TODO true // When enabled, the dentry cache is cleared when a directory is closed. // Disabling this may cause semantic issues. constexpr bool clear_dentry_cache_on_close = true; // When enabled, write operations no longer update the file size on each write. // Instead, the size is updated every `write_size_flush_threshold` writes per // file. fsync/close flushes the size to the server immediately. constexpr bool use_write_size_cache = false; // Instead, the size is updated every `max_write_size_cache` writes per file. // fsync/close flushes the size to the server immediately. constexpr bool use_write_size_cache = false; // TODO true constexpr auto write_size_flush_threshold = 1000; } // namespace cache Loading Loading @@ -94,7 +84,7 @@ constexpr auto zero_buffer_before_read = false; * regardless of their order minimizing the gap between bulk transfers. * Due to spinning in a loop this increases CPU utilization */ constexpr auto spin_lock_read = false; constexpr auto spin_lock_read = true; } // namespace io namespace log { Loading Loading @@ -133,7 +123,26 @@ constexpr auto implicit_data_removal = true; // metadata logic // Check for existence of file metadata before create. This done on RocksDB // level constexpr auto create_exist_check = true; // Verify that a parent directory exists before creating new files or // directories inline bool create_check_parents = true; // Enable support for symbolic links in paths inline bool symlink_support = true; inline bool rename_support = true; // Use inline data for small files // Use inline data for small files inline bool use_inline_data = true; constexpr auto inline_data_size = 4096; // in bytes // Next options may break consistency, and definitely break tests. Use with // care. Optimize write operations for small files (files are not created until // a write appears) inline bool create_write_optimization = false; // Prefetch inline data on read operations inline bool read_inline_prefetch = false; constexpr auto create_exist_check = false; } // namespace metadata namespace data { // directory name below rootdir where chunks are placed Loading @@ -142,14 +151,14 @@ constexpr auto chunk_dir = "chunks"; namespace proxy { constexpr auto pid_path = "/tmp/gkfs_proxy.pid"; constexpr auto fwd_create = true; constexpr auto fwd_stat = true; constexpr auto fwd_remove = true; constexpr auto fwd_get_size = true; constexpr auto fwd_update_size = true; constexpr auto fwd_io = true; constexpr auto fwd_truncate = true; constexpr auto fwd_chunk_stat = true; constexpr auto fwd_create = false; constexpr auto fwd_stat = false; constexpr auto fwd_remove = false; constexpr auto fwd_get_size = false; constexpr auto fwd_update_size = false; constexpr auto fwd_io = false; constexpr auto fwd_truncate = false; constexpr auto fwd_chunk_stat = false; constexpr auto fwd_get_dirents_single = true; // Only use proxy for io if write/read size is higher than set value constexpr auto fwd_io_count_threshold = 0; Loading @@ -157,18 +166,19 @@ constexpr auto fwd_io_count_threshold = 0; } // namespace proxy namespace rpc { constexpr auto chunksize = 524288; // in bytes (e.g., 524288 == 512KB) constexpr auto chunksize = 524288; //1048576; // in bytes (e.g., 524288 == 512KB) // size of preallocated buffer to hold directory entries in rpc call constexpr auto dirents_buff_size = (8 * 1024 * 1024); // 8 mega constexpr auto dirents_buff_size_proxy = (128 * 1024 * 1024); // 8 mega constexpr auto async_opendir = true;//TODO maybe no? /* * Indicates the number of concurrent progress to drive I/O operations of chunk * files to and from local file systems The value is directly mapped to created * Argobots xstreams, controlled in a single pool with ABT_snoozer scheduler */ constexpr auto daemon_io_xstreams = 8; constexpr auto daemon_io_xstreams = 16; // Number of threads used for RPC handlers at the daemon constexpr auto daemon_handler_xstreams = 4; constexpr auto daemon_handler_xstreams = 8; // Number of threads used for RPC handlers at the proxy constexpr auto proxy_handler_xstreams = 3; } // namespace rpc Loading Loading @@ -196,3 +206,4 @@ constexpr auto check_inode = true; } // namespace gkfs::config #endif // GEKKOFS_CONFIG_HPP
src/client/fuse/fuse_client.cpp +16 −2 Original line number Diff line number Diff line Loading @@ -378,6 +378,8 @@ setattr_handler(fuse_req_t req, fuse_ino_t ino, struct stat* attr, int to_set, return; } static int o_success = 0; static int o_failure = 0; static void open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { auto* ud = udata(req); Loading @@ -385,6 +387,7 @@ open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { auto* inode = get_inode(ino); if(!inode) { fuse_reply_err(req, ENOENT); o_failure++; return; } const int mode = 0644; // -rw-r--r-- I think that doesnt matter anyway Loading @@ -392,11 +395,13 @@ open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { fi->flags); // TODO mode! if(fd < 0) { fuse_reply_err(req, ENOENT); o_failure++; return; } fi->fh = fd; fi->direct_io = ud->direct_io; fuse_reply_open(req, fi); o_success++; } static void Loading Loading @@ -455,6 +460,8 @@ write_handler(fuse_req_t req, fuse_ino_t ino, const char* buf, size_t size, fuse_reply_write(req, rc); } static int c_success = 0; static int c_failure = 0; static void create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode, struct fuse_file_info* fi) { Loading @@ -481,8 +488,11 @@ create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode, DEBUG_INFO(ud, "create handler %s", path.c_str()); int fd = gkfs::syscall::gkfs_open(path, mode, fi->flags | O_CREAT); if(fd < 0) { DEBUG_INFO(ud, "create -> open failed errno %i", errno); fuse_reply_err(req, errno); // DEBUG_INFO(ud, "create -> open failed errno %i", errno); // errno = 0 causes EIO //fuse_reply_err(req, errno); fuse_reply_err(req, ENOENT); c_failure++; return; } fi->fh = fd; Loading @@ -492,9 +502,11 @@ create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode, int sc = fill_fuse_entry_param(ud, path, e, false); if(sc < 0) { fuse_reply_err(req, ENOENT); c_failure++; return; } fuse_reply_create(req, &e, fi); c_success++; } /// TODO normally, the file should only be removed if the lookup count is zero, Loading Loading @@ -1313,5 +1325,7 @@ main(int argc, char* argv[]) { } fuse_session_unmount(se); std::cout << "fuse open with success " << o_success << " failure " << o_failure << std::endl; std::cout << "fuse create with success " << c_success << " failure " << c_failure << std::endl; return ret < 0 ? 1 : 0; }
src/client/hooks.cpp +4 −3 Original line number Diff line number Diff line Loading @@ -44,6 +44,7 @@ #include <client/gkfs_functions.hpp> #include <client/path.hpp> #include <client/open_dir.hpp> #include <iostream> #include <common/path_util.hpp> Loading Loading @@ -92,9 +93,9 @@ hook_openat(int dirfd, const char* cpath, int flags, mode_t mode) { return -ENOTDIR; case gkfs::preload::RelativizeStatus::internal: { struct stat st; syscall_no_intercept_wrapper( SYS_newfstatat, dirfd, ".", &st, AT_SYMLINK_NOFOLLOW); //struct stat st; //syscall_no_intercept_wrapper( //SYS_newfstatat, dirfd, ".", &st, AT_SYMLINK_NOFOLLOW); //auto md_ = gkfs::utils::get_metadata(resolved); // pass open to fuse to sync fd in file_map int fd = gsl::narrow_cast<int>(syscall_no_intercept_wrapper( Loading
src/client/open_file_map.cpp +3 −36 Original line number Diff line number Diff line Loading @@ -70,7 +70,7 @@ OpenFile::OpenFile(const string& path, const int flags, FileType type) pos_ = 0; // If O_APPEND flag is used, it will be used before each write. } OpenFileMap::OpenFileMap() : fd_idx(10000), fd_validation_needed(false) {} OpenFileMap::OpenFileMap() : fd_idx(0), fd_validation_needed(false) {} string OpenFile::path() const { Loading Loading @@ -142,41 +142,8 @@ OpenFileMap::exist(const int fd) { int OpenFileMap::safe_generate_fd_idx_() { int fd = 0; if(CTX->protect_fds()) { fd = generate_fd_idx(); /* * Check if fd is still in use and generate another if yes * Note that this can only happen once the all fd indices within the int * has been used to the int::max Once this limit is exceeded, we set * fd_idx back to 3 and begin anew. Only then, if a file was open for a * long time will we have to generate another index. * * This situation can only occur when all fd indices have been given * away once and we start again, in which case the fd_validation_needed * flag is set. fd_validation is set to false, if */ if(fd_validation_needed) { while(exist(fd)) { fd = generate_fd_idx(); } } } else { // Return a virtual fd from 10000, but avoid doing all the FD movements if(CTX->range_fd()) { fd = generate_fd_idx(); if(fd_validation_needed) { while(exist(fd)) { fd = generate_fd_idx(); } } return fd; } fd = syscall_no_intercept(SYS_openat, AT_FDCWD, "/dev/null", O_RDWR, S_IRUSR | S_IWUSR); } return fd; std::lock_guard<std::mutex> inode_lock(fd_idx_mutex); return fd_idx++; } int Loading