Commit 4d66cd27 authored by Ramon Nou's avatar Ramon Nou
Browse files

Merge branch 'rnou/fixcuda' into 'master'

fix cuda issues


See merge request !291
parents 7a4146b9 7066f317
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  - SYS_lstat does not exists on some architectures, change to newfstatat ([!269](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/269))
    - We cannot use lstat directly as may cause a recursion call on libc interception.
  - Un/Packing order of directory entries in compressed format was incorrect ([!281](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/281))
  - Fix pytorch mmap ([!291](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/291))
    

## [0.9.5] - 2025-08
+2 −0
Original line number Diff line number Diff line
@@ -145,6 +145,7 @@ gkfs_read_ws(const gkfs::filemap::OpenFile& file, char* buf, size_t count,
ssize_t
gkfs_pread(int fd, void* buf, size_t count, off64_t offset);


ssize_t
gkfs_read(int fd, void* buf, size_t count);

@@ -192,6 +193,7 @@ gkfs_munmap(void* addr, size_t length);
int
gkfs_msync(void* addr, size_t length, int flags);


} // namespace gkfs::syscall

// gkfs_getsingleserverdir is using extern "C" to demangle it for C usage
+9 −0
Original line number Diff line number Diff line
@@ -135,6 +135,15 @@ ssize_t
hook_pwritev(unsigned long fd, const struct iovec* iov, unsigned long iovcnt,
             unsigned long pos_l, unsigned long pos_h);

ssize_t
hook_sendfile(int out_fd, int in_fd, off_t* offset, size_t count);

#ifdef SYS_copy_file_range
ssize_t
hook_copy_file_range(int fd_in, loff_t* off_in, int fd_out, loff_t* off_out,
                     size_t len, unsigned int flags);
#endif

int
hook_unlinkat(int dirfd, const char* cpath, int flags);

+1 −0
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@ enum class OpenFile_flags {
    cloexec,
    created,          // indicates if the file was created during open
    creation_pending, // indicates if the file creation is delayed
    trunc_pending,    // indicates O_TRUNC has been deferred
    flag_count // this is purely used as a size variable of this enum class
};

+68 −38
Original line number Diff line number Diff line
@@ -133,7 +133,6 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count,
        }
        file.set_flag(gkfs::filemap::OpenFile_flags::creation_pending, false);
    }

    // clear inline data cache as it is stale
    if(!file.inline_data().empty())
        file.inline_data("");
@@ -331,14 +330,19 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count,
ssize_t
gkfs_write_ws(gkfs::filemap::OpenFile& file, const char* buf, size_t count,
              off64_t offset, bool update_pos) {
    const auto pos_before = file.pos();
#ifdef GKFS_ENABLE_CLIENT_METRICS
    auto start_t = std::chrono::high_resolution_clock::now();
    auto written = gkfs_do_write(file, buf, count, offset, update_pos);
    CTX->write_metrics()->add_event(written, start_t);
    return written;
#else
    return gkfs_do_write(file, buf, count, offset, update_pos);
    auto written = gkfs_do_write(file, buf, count, offset, update_pos);
#endif
    LOG(DEBUG,
        "{}() path '{}' count {} offset {} update_pos {} written {} pos_before {} pos_after {}",
        __func__, file.path(), count, offset, update_pos, written, pos_before,
        file.pos());
    return written;
}

/**
@@ -353,8 +357,10 @@ gkfs_write_ws(gkfs::filemap::OpenFile& file, const char* buf, size_t count,
ssize_t
gkfs_pwrite(int fd, const void* buf, size_t count, off64_t offset) {
    auto file = CTX->file_map()->get(fd);
    if(!file)
        return 0;
    if(!file) {
        errno = EBADF;
        return -1;
    }
    return gkfs_write_ws(*file, reinterpret_cast<const char*>(buf), count,
                         offset);
}
@@ -370,8 +376,10 @@ gkfs_pwrite(int fd, const void* buf, size_t count, off64_t offset) {
ssize_t
gkfs_write(int fd, const void* buf, size_t count) {
    auto gkfs_fd = CTX->file_map()->get(fd);
    if(!gkfs_fd)
        return 0;
    if(!gkfs_fd) {
        errno = EBADF;
        return -1;
    }
    // call pwrite and update pos
    auto ret = gkfs_write_ws(*gkfs_fd, reinterpret_cast<const char*>(buf),
                             count, gkfs_fd->pos(), true);
@@ -391,8 +399,10 @@ ssize_t
gkfs_pwritev(int fd, const struct iovec* iov, int iovcnt, off_t offset) {

    auto file = CTX->file_map()->get(fd);
    if(!file)
        return 0;
    if(!file) {
        errno = EBADF;
        return -1;
    }
    auto pos = offset; // keep track of current position
    ssize_t written = 0;
    ssize_t ret;
@@ -432,8 +442,10 @@ ssize_t
gkfs_writev(int fd, const struct iovec* iov, int iovcnt) {

    auto gkfs_fd = CTX->file_map()->get(fd);
    if(!gkfs_fd)
        return 0;
    if(!gkfs_fd) {
        errno = EBADF;
        return -1;
    }
    auto pos = gkfs_fd->pos(); // retrieve the current offset
    auto ret = gkfs_pwritev(fd, iov, iovcnt, pos);
    assert(ret != 0);
@@ -529,10 +541,12 @@ gkfs_do_read(const gkfs::filemap::OpenFile& file, char* buf, size_t count,
            __func__);
    }

    auto do_chunk_read = [&]() -> std::pair<int, long> {
        pair<int, long> ret;
        if(gkfs::config::proxy::fwd_io && CTX->use_proxy() &&
           count > gkfs::config::proxy::fwd_io_count_threshold) {
        ret = gkfs::rpc::forward_read_proxy(file.path(), buf, offset, count);
            ret = gkfs::rpc::forward_read_proxy(file.path(), buf, offset,
                                                count);
        } else {
            std::set<int8_t> failed; // set with failed targets.
            if(CTX->get_replicas() != 0) {
@@ -540,17 +554,23 @@ gkfs_do_read(const gkfs::filemap::OpenFile& file, char* buf, size_t count,
                ret = gkfs::rpc::forward_read(file.path(), buf, offset, count,
                                              CTX->get_replicas(), failed);
                while(ret.first == EIO) {
                ret = gkfs::rpc::forward_read(file.path(), buf, offset, count,
                                              CTX->get_replicas(), failed);
                LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'",
                    ret = gkfs::rpc::forward_read(file.path(), buf, offset,
                                                  count, CTX->get_replicas(),
                                                  failed);
                    LOG(WARNING,
                        "gkfs::rpc::forward_read() failed with ret '{}'",
                        ret.first);
                }

            } else {
            ret = gkfs::rpc::forward_read(file.path(), buf, offset, count, 0,
                                          failed);
                ret = gkfs::rpc::forward_read(file.path(), buf, offset, count,
                                              0, failed);
            }
        }
        return ret;
    };

    auto ret = do_chunk_read();
    auto err = ret.first;
    if(err) {
        LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'", err);
@@ -594,8 +614,10 @@ gkfs_read_ws(const gkfs::filemap::OpenFile& file, char* buf, size_t count,
ssize_t
gkfs_pread(int fd, void* buf, size_t count, off64_t offset) {
    auto gkfs_fd = CTX->file_map()->get(fd);
    if(!gkfs_fd)
        return 0;
    if(!gkfs_fd) {
        errno = EBADF;
        return -1;
    }
    return gkfs_read_ws(*gkfs_fd, reinterpret_cast<char*>(buf), count, offset);
}

@@ -610,8 +632,12 @@ gkfs_pread(int fd, void* buf, size_t count, off64_t offset) {
ssize_t
gkfs_read(int fd, void* buf, size_t count) {
    auto gkfs_fd = CTX->file_map()->get(fd);
    if(!gkfs_fd)
        return 0;
    if(!gkfs_fd) {
        errno = EBADF;
        return -1;
    }
    LOG(DEBUG, "{}() reading path '{}' count {} pos {}", __func__,
        gkfs_fd->path(), count, gkfs_fd->pos());
    auto pos = gkfs_fd->pos(); // retrieve the current offset
    auto ret = gkfs_read_ws(*gkfs_fd, reinterpret_cast<char*>(buf), count, pos);
    // Update offset in file descriptor in the file map
@@ -634,8 +660,10 @@ ssize_t
gkfs_preadv(int fd, const struct iovec* iov, int iovcnt, off_t offset) {

    auto file = CTX->file_map()->get(fd);
    if(!file)
        return 0;
    if(!file) {
        errno = EBADF;
        return -1;
    }
    auto pos = offset; // keep track of current position
    ssize_t read = 0;
    ssize_t ret;
@@ -675,8 +703,10 @@ ssize_t
gkfs_readv(int fd, const struct iovec* iov, int iovcnt) {

    auto gkfs_fd = CTX->file_map()->get(fd);
    if(!gkfs_fd)
        return 0;
    if(!gkfs_fd) {
        errno = EBADF;
        return -1;
    }
    auto pos = gkfs_fd->pos(); // retrieve the current offset
    auto ret = gkfs_preadv(fd, iov, iovcnt, pos);
    assert(ret != 0);
@@ -691,8 +721,8 @@ int
gkfs_fsync(unsigned int fd) {
    auto file = CTX->file_map()->get(fd);
    if(!file) {
        errno = 0;
        return 0;
        errno = EBADF;
        return -1;
    }
    // flush write size cache to be server consistent
    if(CTX->use_write_size_cache()) {
Loading