Commit add983f3 authored by Julius Athenstaedt's avatar Julius Athenstaedt
Browse files

fix open64 EIO, it was caused by the fd generator which returned somehow...

fix open64 EIO, it was caused by the fd generator which returned somehow negative fds, probably due to the /dev/null mechanism
parent d8910e9b
Loading
Loading
Loading
Loading
Loading
+40 −29
Original line number Diff line number Diff line
/*
  Copyright 2018-2025, Barcelona Supercomputing Center (BSC), Spain
  Copyright 2015-2025, Johannes Gutenberg Universitaet Mainz, Germany
  Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain
  Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany

  This software was partially supported by the
  EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).
@@ -8,16 +8,6 @@
  This software was partially supported by the
  ADA-FS project under the SPPEXA project funded by the DFG.

  This software was partially supported by the
  the European Union’s Horizon 2020 JTI-EuroHPC research and
  innovation programme, by the project ADMIRE (Project ID: 956748,
  admire-eurohpc.eu)

  This project was partially promoted by the Ministry for Digital Transformation
  and the Civil Service, within the framework of the Recovery,
  Transformation and Resilience Plan - Funded by the European Union
  -NextGenerationEU.

  This file is part of GekkoFS.

  GekkoFS is free software: you can redistribute it and/or modify
@@ -58,14 +48,14 @@ constexpr auto forwarding_file_path = "";

namespace cache {
// Optimization for readdir which avoids consecutive stat calls
constexpr bool use_dentry_cache = false;
constexpr bool use_dentry_cache = false; // TODO true
// When enabled, the dentry cache is cleared when a directory is closed.
// Disabling this may cause semantic issues.
constexpr bool clear_dentry_cache_on_close = true;
// When enabled, write operations no longer update the file size on each write.
// Instead, the size is updated every `write_size_flush_threshold` writes per
// file. fsync/close flushes the size to the server immediately.
constexpr bool use_write_size_cache = false;
// Instead, the size is updated every `max_write_size_cache` writes per file.
// fsync/close flushes the size to the server immediately.
constexpr bool use_write_size_cache = false; // TODO true
constexpr auto write_size_flush_threshold = 1000;
} // namespace cache

@@ -94,7 +84,7 @@ constexpr auto zero_buffer_before_read = false;
 * regardless of their order minimizing the gap between bulk transfers.
 * Due to spinning in a loop this increases CPU utilization
 */
constexpr auto spin_lock_read = false;
constexpr auto spin_lock_read = true;
} // namespace io

namespace log {
@@ -133,7 +123,26 @@ constexpr auto implicit_data_removal = true;
// metadata logic
// Check for existence of file metadata before create. This done on RocksDB
// level
constexpr auto create_exist_check = true;
// Verify that a parent directory exists before creating new files or
// directories
inline bool create_check_parents = true;
// Enable support for symbolic links in paths
inline bool symlink_support = true;
inline bool rename_support = true;

// Use inline data for small files
// Use inline data for small files
inline bool use_inline_data = true;
constexpr auto inline_data_size = 4096; // in bytes
// Next options may break consistency, and definitely break tests. Use with
// care. Optimize write operations for small files (files are not created until
// a write appears)
inline bool create_write_optimization = false;
// Prefetch inline data on read operations
inline bool read_inline_prefetch = false;


constexpr auto create_exist_check = false;
} // namespace metadata
namespace data {
// directory name below rootdir where chunks are placed
@@ -142,14 +151,14 @@ constexpr auto chunk_dir = "chunks";

namespace proxy {
constexpr auto pid_path = "/tmp/gkfs_proxy.pid";
constexpr auto fwd_create = true;
constexpr auto fwd_stat = true;
constexpr auto fwd_remove = true;
constexpr auto fwd_get_size = true;
constexpr auto fwd_update_size = true;
constexpr auto fwd_io = true;
constexpr auto fwd_truncate = true;
constexpr auto fwd_chunk_stat = true;
constexpr auto fwd_create = false;
constexpr auto fwd_stat = false;
constexpr auto fwd_remove = false;
constexpr auto fwd_get_size = false;
constexpr auto fwd_update_size = false;
constexpr auto fwd_io = false;
constexpr auto fwd_truncate = false;
constexpr auto fwd_chunk_stat = false;
constexpr auto fwd_get_dirents_single = true;
// Only use proxy for io if write/read size is higher than set value
constexpr auto fwd_io_count_threshold = 0;
@@ -157,18 +166,19 @@ constexpr auto fwd_io_count_threshold = 0;
} // namespace proxy

namespace rpc {
constexpr auto chunksize = 524288; // in bytes (e.g., 524288 == 512KB)
constexpr auto chunksize = 524288; //1048576; // in bytes (e.g., 524288 == 512KB)
// size of preallocated buffer to hold directory entries in rpc call
constexpr auto dirents_buff_size = (8 * 1024 * 1024);         // 8 mega
constexpr auto dirents_buff_size_proxy = (128 * 1024 * 1024); // 8 mega
constexpr auto async_opendir = true;//TODO maybe no?
/*
 * Indicates the number of concurrent progress to drive I/O operations of chunk
 * files to and from local file systems The value is directly mapped to created
 * Argobots xstreams, controlled in a single pool with ABT_snoozer scheduler
 */
constexpr auto daemon_io_xstreams = 8;
constexpr auto daemon_io_xstreams = 16;
// Number of threads used for RPC handlers at the daemon
constexpr auto daemon_handler_xstreams = 4;
constexpr auto daemon_handler_xstreams = 8;
// Number of threads used for RPC handlers at the proxy
constexpr auto proxy_handler_xstreams = 3;
} // namespace rpc
@@ -196,3 +206,4 @@ constexpr auto check_inode = true;
} // namespace gkfs::config

#endif // GEKKOFS_CONFIG_HPP
+16 −2
Original line number Diff line number Diff line
@@ -378,6 +378,8 @@ setattr_handler(fuse_req_t req, fuse_ino_t ino, struct stat* attr, int to_set,
    return;
}

static int o_success = 0;
static int o_failure = 0;
static void
open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
    auto* ud = udata(req);
@@ -385,6 +387,7 @@ open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
    auto* inode = get_inode(ino);
    if(!inode) {
        fuse_reply_err(req, ENOENT);
				o_failure++;
        return;
    }
    const int mode = 0644; // -rw-r--r--    I think that doesnt matter anyway
@@ -392,11 +395,13 @@ open_handler(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
                                      fi->flags); // TODO mode!
    if(fd < 0) {
        fuse_reply_err(req, ENOENT);
				o_failure++;
        return;
    }
    fi->fh = fd;
    fi->direct_io = ud->direct_io;
    fuse_reply_open(req, fi);
		o_success++;
}

static void
@@ -455,6 +460,8 @@ write_handler(fuse_req_t req, fuse_ino_t ino, const char* buf, size_t size,
    fuse_reply_write(req, rc);
}

static int c_success = 0;
static int c_failure = 0;
static void
create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode,
               struct fuse_file_info* fi) {
@@ -481,8 +488,11 @@ create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode,
    DEBUG_INFO(ud, "create handler %s", path.c_str());
    int fd = gkfs::syscall::gkfs_open(path, mode, fi->flags | O_CREAT);
    if(fd < 0) {
        DEBUG_INFO(ud, "create -> open failed errno %i", errno);
        fuse_reply_err(req, errno);
        // DEBUG_INFO(ud, "create -> open failed errno %i", errno);
				// errno = 0 causes EIO
        //fuse_reply_err(req, errno);
        fuse_reply_err(req, ENOENT);
				c_failure++;
        return;
    }
    fi->fh = fd;
@@ -492,9 +502,11 @@ create_handler(fuse_req_t req, fuse_ino_t parent, const char* name, mode_t mode,
    int sc = fill_fuse_entry_param(ud, path, e, false);
    if(sc < 0) {
        fuse_reply_err(req, ENOENT);
				c_failure++;
        return;
    }
    fuse_reply_create(req, &e, fi);
		c_success++;
}

/// TODO normally, the file should only be removed if the lookup count is zero,
@@ -1313,5 +1325,7 @@ main(int argc, char* argv[]) {
    }

    fuse_session_unmount(se);
		std::cout << "fuse open with success " << o_success << " failure " << o_failure << std::endl;
		std::cout << "fuse create with success " << c_success << " failure " << c_failure << std::endl;
    return ret < 0 ? 1 : 0;
}
+4 −3
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@
#include <client/gkfs_functions.hpp>
#include <client/path.hpp>
#include <client/open_dir.hpp>
#include <iostream>

#include <common/path_util.hpp>

@@ -92,9 +93,9 @@ hook_openat(int dirfd, const char* cpath, int flags, mode_t mode) {
            return -ENOTDIR;

        case gkfs::preload::RelativizeStatus::internal: {
            struct stat st;
            syscall_no_intercept_wrapper(
                SYS_newfstatat, dirfd, ".", &st, AT_SYMLINK_NOFOLLOW);
            //struct stat st;
            //syscall_no_intercept_wrapper(
                //SYS_newfstatat, dirfd, ".", &st, AT_SYMLINK_NOFOLLOW);
            //auto md_ = gkfs::utils::get_metadata(resolved);
            // pass open to fuse to sync fd in file_map
            int fd = gsl::narrow_cast<int>(syscall_no_intercept_wrapper(
+3 −36
Original line number Diff line number Diff line
@@ -70,7 +70,7 @@ OpenFile::OpenFile(const string& path, const int flags, FileType type)
    pos_ = 0; // If O_APPEND flag is used, it will be used before each write.
}

OpenFileMap::OpenFileMap() : fd_idx(10000), fd_validation_needed(false) {}
OpenFileMap::OpenFileMap() : fd_idx(0), fd_validation_needed(false) {}

string
OpenFile::path() const {
@@ -142,41 +142,8 @@ OpenFileMap::exist(const int fd) {

int
OpenFileMap::safe_generate_fd_idx_() {
    int fd = 0;
    if(CTX->protect_fds()) {
        fd = generate_fd_idx();
        /*
         * Check if fd is still in use and generate another if yes
         * Note that this can only happen once the all fd indices within the int
         * has been used to the int::max Once this limit is exceeded, we set
         * fd_idx back to 3 and begin anew. Only then, if a file was open for a
         * long time will we have to generate another index.
         *
         * This situation can only occur when all fd indices have been given
         * away once and we start again, in which case the fd_validation_needed
         * flag is set. fd_validation is set to false, if
         */
        if(fd_validation_needed) {
            while(exist(fd)) {
                fd = generate_fd_idx();
            }
        }
    } else {
        // Return a virtual fd from 10000, but avoid doing all the FD movements
        if(CTX->range_fd()) {
            fd = generate_fd_idx();
            if(fd_validation_needed) {
                while(exist(fd)) {
                    fd = generate_fd_idx();
                }
            }
            return fd;
        }

        fd = syscall_no_intercept(SYS_openat, AT_FDCWD, "/dev/null", O_RDWR,
                                  S_IRUSR | S_IWUSR);
    }
    return fd;
    std::lock_guard<std::mutex> inode_lock(fd_idx_mutex);
    return fd_idx++;
}

int