fix open64 EIO, it was caused by the fd generator which returned somehow... (add983f3) · Commits · hpc / gekkofs

include/config.hpp

+40 −29

Original line number	Diff line number	Diff line
		/*
		Copyright 2018-2025, Barcelona Supercomputing Center (BSC), Spain
		Copyright 2015-2025, Johannes Gutenberg Universitaet Mainz, Germany
		Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain
		Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany

		This software was partially supported by the
		EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).
		@@ -8,16 +8,6 @@
		This software was partially supported by the
		ADA-FS project under the SPPEXA project funded by the DFG.

		This software was partially supported by the
		the European Union’s Horizon 2020 JTI-EuroHPC research and
		innovation programme, by the project ADMIRE (Project ID: 956748,
		admire-eurohpc.eu)

		This project was partially promoted by the Ministry for Digital Transformation
		and the Civil Service, within the framework of the Recovery,
		Transformation and Resilience Plan - Funded by the European Union
		-NextGenerationEU.

		This file is part of GekkoFS.

		GekkoFS is free software: you can redistribute it and/or modify
		@@ -58,14 +48,14 @@ constexpr auto forwarding_file_path = "";

		namespace cache {
		// Optimization for readdir which avoids consecutive stat calls
		constexpr bool use_dentry_cache = false;
		constexpr bool use_dentry_cache = false; // TODO true
		// When enabled, the dentry cache is cleared when a directory is closed.
		// Disabling this may cause semantic issues.
		constexpr bool clear_dentry_cache_on_close = true;
		// When enabled, write operations no longer update the file size on each write.
		// Instead, the size is updated every `write_size_flush_threshold` writes per
		// file. fsync/close flushes the size to the server immediately.
		constexpr bool use_write_size_cache = false;
		// Instead, the size is updated every `max_write_size_cache` writes per file.
		// fsync/close flushes the size to the server immediately.
		constexpr bool use_write_size_cache = false; // TODO true
		constexpr auto write_size_flush_threshold = 1000;
		} // namespace cache

		@@ -94,7 +84,7 @@ constexpr auto zero_buffer_before_read = false;
		* regardless of their order minimizing the gap between bulk transfers.
		* Due to spinning in a loop this increases CPU utilization
		*/
		constexpr auto spin_lock_read = false;
		constexpr auto spin_lock_read = true;
		} // namespace io

		namespace log {
		@@ -133,7 +123,26 @@ constexpr auto implicit_data_removal = true;
		// metadata logic
		// Check for existence of file metadata before create. This done on RocksDB
		// level
		constexpr auto create_exist_check = true;
		// Verify that a parent directory exists before creating new files or
		// directories
		inline bool create_check_parents = true;
		// Enable support for symbolic links in paths
		inline bool symlink_support = true;
		inline bool rename_support = true;

		// Use inline data for small files
		// Use inline data for small files
		inline bool use_inline_data = true;
		constexpr auto inline_data_size = 4096; // in bytes
		// Next options may break consistency, and definitely break tests. Use with
		// care. Optimize write operations for small files (files are not created until
		// a write appears)
		inline bool create_write_optimization = false;
		// Prefetch inline data on read operations
		inline bool read_inline_prefetch = false;


		constexpr auto create_exist_check = false;
		} // namespace metadata
		namespace data {
		// directory name below rootdir where chunks are placed
		@@ -142,14 +151,14 @@ constexpr auto chunk_dir = "chunks";

		namespace proxy {
		constexpr auto pid_path = "/tmp/gkfs_proxy.pid";
		constexpr auto fwd_create = true;
		constexpr auto fwd_stat = true;
		constexpr auto fwd_remove = true;
		constexpr auto fwd_get_size = true;
		constexpr auto fwd_update_size = true;
		constexpr auto fwd_io = true;
		constexpr auto fwd_truncate = true;
		constexpr auto fwd_chunk_stat = true;
		constexpr auto fwd_create = false;
		constexpr auto fwd_stat = false;
		constexpr auto fwd_remove = false;
		constexpr auto fwd_get_size = false;
		constexpr auto fwd_update_size = false;
		constexpr auto fwd_io = false;
		constexpr auto fwd_truncate = false;
		constexpr auto fwd_chunk_stat = false;
		constexpr auto fwd_get_dirents_single = true;
		// Only use proxy for io if write/read size is higher than set value
		constexpr auto fwd_io_count_threshold = 0;
		@@ -157,18 +166,19 @@ constexpr auto fwd_io_count_threshold = 0;
		} // namespace proxy

		namespace rpc {
		constexpr auto chunksize = 524288; // in bytes (e.g., 524288 == 512KB)
		constexpr auto chunksize = 524288; //1048576; // in bytes (e.g., 524288 == 512KB)
		// size of preallocated buffer to hold directory entries in rpc call
		constexpr auto dirents_buff_size = (8 * 1024 * 1024); // 8 mega
		constexpr auto dirents_buff_size_proxy = (128 * 1024 * 1024); // 8 mega
		constexpr auto async_opendir = true;//TODO maybe no?
		/*
		* Indicates the number of concurrent progress to drive I/O operations of chunk
		* files to and from local file systems The value is directly mapped to created
		* Argobots xstreams, controlled in a single pool with ABT_snoozer scheduler
		*/
		constexpr auto daemon_io_xstreams = 8;
		constexpr auto daemon_io_xstreams = 16;
		// Number of threads used for RPC handlers at the daemon
		constexpr auto daemon_handler_xstreams = 4;
		constexpr auto daemon_handler_xstreams = 8;
		// Number of threads used for RPC handlers at the proxy
		constexpr auto proxy_handler_xstreams = 3;
		} // namespace rpc
		@@ -196,3 +206,4 @@ constexpr auto check_inode = true;
		} // namespace gkfs::config

		#endif // GEKKOFS_CONFIG_HPP

src/client/fuse/fuse_client.cpp

+16 −2

Original line number	Diff line number	Diff line
		@@ -378,6 +378,8 @@ setattr_handler(fuse_req_t req, fuse_ino_t ino, struct stat* attr, int to_set,
		return;
		}

		static int o_success = 0;
		static int o_failure = 0;
		static void
		open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
		auto* ud = udata(req);
		@@ -385,6 +387,7 @@ open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
		auto* inode = get_inode(ino);
		if(!inode) {
		fuse_reply_err(req, ENOENT);
		o_failure++;
		return;
		}
		const int mode = 0644; // -rw-r--r-- I think that doesnt matter anyway
		@@ -392,11 +395,13 @@ open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
		fi->flags); // TODO mode!
		if(fd < 0) {
		fuse_reply_err(req, ENOENT);
		o_failure++;
		return;
		}
		fi->fh = fd;
		fi->direct_io = ud->direct_io;
		fuse_reply_open(req, fi);
		o_success++;
		}

		static void
		@@ -455,6 +460,8 @@ write_handler(fuse_req_t req, fuse_ino_t ino, const char* buf, size_t size,
		fuse_reply_write(req, rc);
		}

		static int c_success = 0;
		static int c_failure = 0;
		static void
		create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode,
		struct fuse_file_info* fi) {
		@@ -481,8 +488,11 @@ create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode,
		DEBUG_INFO(ud, "create handler %s", path.c_str());
		int fd = gkfs::syscall::gkfs_open(path, mode, fi->flags \| O_CREAT);
		if(fd < 0) {
		DEBUG_INFO(ud, "create -> open failed errno %i", errno);
		fuse_reply_err(req, errno);
		// DEBUG_INFO(ud, "create -> open failed errno %i", errno);
		// errno = 0 causes EIO
		//fuse_reply_err(req, errno);
		fuse_reply_err(req, ENOENT);
		c_failure++;
		return;
		}
		fi->fh = fd;
		@@ -492,9 +502,11 @@ create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode,
		int sc = fill_fuse_entry_param(ud, path, e, false);
		if(sc < 0) {
		fuse_reply_err(req, ENOENT);
		c_failure++;
		return;
		}
		fuse_reply_create(req, &e, fi);
		c_success++;
		}

		/// TODO normally, the file should only be removed if the lookup count is zero,
		@@ -1313,5 +1325,7 @@ main(int argc, char* argv[]) {
		}

		fuse_session_unmount(se);
		std::cout << "fuse open with success " << o_success << " failure " << o_failure << std::endl;
		std::cout << "fuse create with success " << c_success << " failure " << c_failure << std::endl;
		return ret < 0 ? 1 : 0;
		}

src/client/hooks.cpp

+4 −3

Original line number	Diff line number	Diff line
		@@ -44,6 +44,7 @@
		#include <client/gkfs_functions.hpp>
		#include <client/path.hpp>
		#include <client/open_dir.hpp>
		#include <iostream>

		#include <common/path_util.hpp>

		@@ -92,9 +93,9 @@ hook_openat(int dirfd, const char* cpath, int flags, mode_t mode) {
		return -ENOTDIR;

		case gkfs::preload::RelativizeStatus::internal: {
		struct stat st;
		syscall_no_intercept_wrapper(
		SYS_newfstatat, dirfd, ".", &st, AT_SYMLINK_NOFOLLOW);
		//struct stat st;
		//syscall_no_intercept_wrapper(
		//SYS_newfstatat, dirfd, ".", &st, AT_SYMLINK_NOFOLLOW);
		//auto md_ = gkfs::utils::get_metadata(resolved);
		// pass open to fuse to sync fd in file_map
		int fd = gsl::narrow_cast<int>(syscall_no_intercept_wrapper(

src/client/open_file_map.cpp

+3 −36

Original line number	Diff line number	Diff line
		@@ -70,7 +70,7 @@ OpenFile::OpenFile(const string& path, const int flags, FileType type)
		pos_ = 0; // If O_APPEND flag is used, it will be used before each write.
		}

		OpenFileMap::OpenFileMap() : fd_idx(10000), fd_validation_needed(false) {}
		OpenFileMap::OpenFileMap() : fd_idx(0), fd_validation_needed(false) {}

		string
		OpenFile::path() const {
		@@ -142,41 +142,8 @@ OpenFileMap::exist(const int fd) {

		int
		OpenFileMap::safe_generate_fd_idx_() {
		int fd = 0;
		if(CTX->protect_fds()) {
		fd = generate_fd_idx();
		/*
		* Check if fd is still in use and generate another if yes
		* Note that this can only happen once the all fd indices within the int
		* has been used to the int::max Once this limit is exceeded, we set
		* fd_idx back to 3 and begin anew. Only then, if a file was open for a
		* long time will we have to generate another index.
		*
		* This situation can only occur when all fd indices have been given
		* away once and we start again, in which case the fd_validation_needed
		* flag is set. fd_validation is set to false, if
		*/
		if(fd_validation_needed) {
		while(exist(fd)) {
		fd = generate_fd_idx();
		}
		}
		} else {
		// Return a virtual fd from 10000, but avoid doing all the FD movements
		if(CTX->range_fd()) {
		fd = generate_fd_idx();
		if(fd_validation_needed) {
		while(exist(fd)) {
		fd = generate_fd_idx();
		}
		}
		return fd;
		}

		fd = syscall_no_intercept(SYS_openat, AT_FDCWD, "/dev/null", O_RDWR,
		S_IRUSR \| S_IWUSR);
		}
		return fd;
		std::lock_guard<std::mutex> inode_lock(fd_idx_mutex);
		return fd_idx++;
		}

		int