Newer
Older
/*
Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain
Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany
This software was partially supported by the
EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).
This software was partially supported by the
ADA-FS project under the SPPEXA project funded by the DFG.
SPDX-License-Identifier: MIT
*/
#include <config.hpp>
#include <client/preload_util.hpp>
#include <client/logging.hpp>
#include <client/gkfs_functions.hpp>
#include <client/rpc/ld_rpc_metadentry.hpp>
#include <client/rpc/ld_rpc_data_ws.hpp>
#include <client/open_dir.hpp>
#include <global/path_util.hpp>
extern "C" {
#include <sys/statfs.h>
#include <sys/statvfs.h>
}
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
std::shared_ptr<Metadata> gkfs::func::metadata(const string& path, bool follow_links) {
std::string attr;
auto err = rpc_send::stat(path, attr);
if (err) {
return nullptr;
}
#ifdef HAS_SYMLINKS
if (follow_links) {
Metadata md{attr};
while (md.is_link()) {
err = rpc_send::stat(md.target_path(), attr);
if (err) {
return nullptr;
}
md = Metadata{attr};
}
}
#endif
return make_shared<Metadata>(attr);
}
int gkfs::func::check_parent_dir(const std::string& path) {
#if CREATE_CHECK_PARENTS
auto p_comp = dirname(path);
auto md = gkfs::func::metadata(p_comp);
if (!md) {
if (errno == ENOENT) {
LOG(DEBUG, "Parent component does not exist: '{}'", p_comp);
} else {
LOG(ERROR, "Failed to get metadata for parent component '{}': {}", path, strerror(errno));
}
return -1;
}
if (!S_ISDIR(md->mode())) {
LOG(DEBUG, "Parent component is not a directory: '{}'", p_comp);
errno = ENOTDIR;
return -1;
}
#endif // CREATE_CHECK_PARENTS
return 0;
}
int gkfs::func::open(const std::string& path, mode_t mode, int flags) {
if (flags & O_PATH) {
LOG(ERROR, "`O_PATH` flag is not supported");
errno = ENOTSUP;
return -1;
}
if (flags & O_APPEND) {
LOG(ERROR, "`O_APPEND` flag is not supported");
errno = ENOTSUP;
return -1;
}
auto md = gkfs::func::metadata(path);
if (errno == ENOENT) {
LOG(ERROR, "Error while retriving stat to file");
return -1;
}
}
if (!exists) {
if (!(flags & O_CREAT)) {
// file doesn't exists and O_CREAT was not set
errno = ENOENT;
return -1;
}
/*** CREATION ***/
assert(flags & O_CREAT);
if (flags & O_DIRECTORY) {
LOG(ERROR, "O_DIRECTORY use with O_CREAT. NOT SUPPORTED");
// no access check required here. If one is using our FS they have the permissions.
if (gkfs::func::mk_node(path, mode | S_IFREG)) {
LOG(ERROR, "Error creating non-existent file: '{}'", strerror(errno));
if (flags & O_EXCL) {
// File exists and O_EXCL was set
errno = EEXIST;
return -1;
#ifdef HAS_SYMLINKS
if (md->is_link()) {
if (flags & O_NOFOLLOW) {
LOG(WARNING, "Symlink found and O_NOFOLLOW flag was specified");
return gkfs::func::open(md->target_path(), mode, flags);
if (S_ISDIR(md->mode())) {
return gkfs::func::opendir(path);
if ((flags & O_TRUNC) && ((flags & O_RDWR) || (flags & O_WRONLY))) {
if (gkfs::func::truncate(path, md->size(), 0)) {
LOG(ERROR, "Error truncating file");
return CTX->file_map()->add(std::make_shared<OpenFile>(path, flags));
int gkfs::func::mk_node(const std::string& path, mode_t mode) {
switch (mode & S_IFMT) {
case 0:
mode |= S_IFREG;
break;
case S_IFREG: // intentionally fall-through
case S_IFDIR:
break;
case S_IFCHR: // intentionally fall-through
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
LOG(WARNING, "Unsupported node type");
errno = ENOTSUP;
return -1;
default:
LOG(WARNING, "Unrecognized node type");
return rpc_send::mk_node(path, mode);
/**
* This sends internally a broadcast (i.e. n RPCs) to clean their chunk folders for that path
* @param path
* @return
*/
int gkfs::func::rm_node(const std::string& path) {
auto md = gkfs::func::metadata(path);
return -1;
bool has_data = S_ISREG(md->mode()) && (md->size() != 0);
return rpc_send::rm_node(path, !has_data, md->size());
int gkfs::func::access(const std::string& path, const int mask, bool follow_links) {
auto md = gkfs::func::metadata(path, follow_links);
int gkfs::func::stat(const string& path, struct stat* buf, bool follow_links) {
auto md = gkfs::func::metadata(path, follow_links);
gkfs::client::metadata_to_stat(path, *md, *buf);
int gkfs::func::statfs(sys_statfs* buf) {
auto blk_stat = rpc_send::chunk_stat();
buf->f_type = 0;
buf->f_bsize = blk_stat.chunk_size;
buf->f_blocks = blk_stat.chunk_total;
buf->f_bfree = blk_stat.chunk_free;
buf->f_bavail = blk_stat.chunk_free;
buf->f_files = 0;
buf->f_ffree = 0;
buf->f_fsid = {0, 0};
buf->f_namelen = PATH_MAX_LEN;
buf->f_frsize = 0;
buf->f_flags =
ST_NOATIME | ST_NODIRATIME | ST_NOSUID | ST_NODEV | ST_SYNCHRONOUS;
int gkfs::func::statvfs(sys_statvfs* buf) {
init_ld_env_if_needed();
auto blk_stat = rpc_send::chunk_stat();
buf->f_bsize = blk_stat.chunk_size;
buf->f_blocks = blk_stat.chunk_total;
buf->f_bfree = blk_stat.chunk_free;
buf->f_bavail = blk_stat.chunk_free;
buf->f_files = 0;
buf->f_ffree = 0;
buf->f_favail = 0;
buf->f_fsid = 0;
buf->f_namemax = PATH_MAX_LEN;
buf->f_frsize = 0;
buf->f_flag =
ST_NOATIME | ST_NODIRATIME | ST_NOSUID | ST_NODEV | ST_SYNCHRONOUS;
off_t gkfs::func::lseek(unsigned int fd, off_t offset, unsigned int whence) {
return gkfs::func::lseek(CTX->file_map()->get(fd), offset, whence);
off_t gkfs::func::lseek(shared_ptr<OpenFile> gkfs_fd, off_t offset, unsigned int whence) {
switch (whence) {
case SEEK_SET:
gkfs_fd->pos(offset);
break;
case SEEK_CUR:
gkfs_fd->pos(gkfs_fd->pos() + offset);
break;
case SEEK_END: {
auto err = rpc_send::get_metadentry_size(gkfs_fd->path(), file_size);
if (err < 0) {
errno = err; // Negative numbers are explicitly for error codes
return -1;
}
gkfs_fd->pos(file_size + offset);
break;
}
case SEEK_DATA:
LOG(WARNING, "SEEK_DATA whence is not supported");
// We do not support this whence yet
errno = EINVAL;
return -1;
case SEEK_HOLE:
LOG(WARNING, "SEEK_HOLE whence is not supported");
// We do not support this whence yet
errno = EINVAL;
return -1;
default:
LOG(WARNING, "Unknown whence value {:#x}", whence);
errno = EINVAL;
return -1;
}
return gkfs_fd->pos();
int gkfs::func::truncate(const std::string& path, off_t old_size, off_t new_size) {
assert(new_size <= old_size);
if (new_size == old_size) {
if (rpc_send::decr_size(path, new_size)) {
LOG(DEBUG, "Failed to decrease size");
if (rpc_send::trunc_data(path, old_size, new_size)) {
LOG(DEBUG, "Failed to truncate data");
int gkfs::func::truncate(const std::string& path, off_t length) {
/* TODO CONCURRENCY:
* At the moment we first ask the length to the metadata-server in order to
* know which data-server have data to be deleted.
*
* From the moment we issue the gkfs_stat and the moment we issue the
* gkfs_trunc_data, some more data could have been added to the file and the
if (length < 0) {
LOG(DEBUG, "Length is negative: {}", length);
auto md = gkfs::func::metadata(path, true);
if (static_cast<unsigned long>(length) > size) {
LOG(DEBUG, "Length is greater then file size: {} > {}", length, size);
return gkfs::func::truncate(path, size, length);
int gkfs::func::dup(const int oldfd) {
return CTX->file_map()->dup(oldfd);
int gkfs::func::dup2(const int oldfd, const int newfd) {
return CTX->file_map()->dup2(oldfd, newfd);
ssize_t gkfs::func::pwrite(std::shared_ptr<OpenFile> file, const char* buf, size_t count, off64_t offset) {
if (file->type() != FileType::regular) {
assert(file->type() == FileType::directory);
LOG(WARNING, "Cannot read from directory");
auto path = make_shared<string>(file->path());
auto append_flag = file->get_flag(OpenFile_flags::append);
ret = rpc_send::update_metadentry_size(*path, count, offset, append_flag, updated_size);
LOG(ERROR, "update_metadentry_size() failed with ret {}", ret);
ret = rpc_send::write(*path, buf, append_flag, offset, count, updated_size);
LOG(WARNING, "rpc_send::write() failed with ret {}", ret);
return ret; // return written size or -1 as error
ssize_t gkfs::func::pwrite_ws(int fd, const void* buf, size_t count, off64_t offset) {
auto file = CTX->file_map()->get(fd);
return gkfs::func::pwrite(file, reinterpret_cast<const char*>(buf), count, offset);
/* Write counts bytes starting from current file position
* It also update the file position accordingly
*
* Same as write syscall.
*/
ssize_t gkfs::func::write(int fd, const void* buf, size_t count) {
auto gkfs_fd = CTX->file_map()->get(fd);
auto pos = gkfs_fd->pos(); //retrieve the current offset
if (gkfs_fd->get_flag(OpenFile_flags::append))
gkfs::func::lseek(gkfs_fd, 0, SEEK_END);
auto ret = gkfs::func::pwrite(gkfs_fd, reinterpret_cast<const char*>(buf), count, pos);
// Update offset in file descriptor in the file map
if (ret > 0) {
gkfs_fd->pos(pos + count);
ssize_t gkfs::func::pwritev(int fd, const struct iovec* iov, int iovcnt, off_t offset) {
auto file = CTX->file_map()->get(fd);
auto pos = offset; // keep truck of current position
ssize_t written = 0;
ssize_t ret;
for (int i = 0; i < iovcnt; ++i) {
auto count = (iov + i)->iov_len;
if (count == 0) {
continue;
}
auto buf = (iov + i)->iov_base;
ret = gkfs::func::pwrite(file, reinterpret_cast<char*>(buf), count, pos);
if (ret == -1) {
break;
}
written += ret;
pos += ret;
if (static_cast<size_t>(ret) < count) {
break;
}
}
if (written == 0) {
return -1;
}
return written;
}
ssize_t gkfs::func::writev(int fd, const struct iovec* iov, int iovcnt) {
auto gkfs_fd = CTX->file_map()->get(fd);
auto pos = gkfs_fd->pos(); // retrieve the current offset
auto ret = gkfs::func::pwritev(fd, iov, iovcnt, pos);
assert(ret != 0);
if (ret < 0) {
return -1;
}
gkfs_fd->pos(pos + ret);
ssize_t gkfs::func::pread(std::shared_ptr<OpenFile> file, char* buf, size_t count, off64_t offset) {
if (file->type() != FileType::regular) {
assert(file->type() == FileType::directory);
LOG(WARNING, "Cannot read from directory");
// Zeroing buffer before read is only relevant for sparse files. Otherwise sparse regions contain invalid data.
if (gkfs_config::io::zero_buffer_before_read) {
memset(buf, 0, sizeof(char) * count);
}
auto ret = rpc_send::read(file->path(), buf, offset, count);
LOG(WARNING, "rpc_send::read() failed with ret {}", ret);
}
// XXX check that we don't try to read past end of the file
return ret; // return read size or -1 as error
ssize_t gkfs::func::read(int fd, void* buf, size_t count) {
auto gkfs_fd = CTX->file_map()->get(fd);
auto pos = gkfs_fd->pos(); //retrieve the current offset
auto ret = gkfs::func::pread(gkfs_fd, reinterpret_cast<char*>(buf), count, pos);
// Update offset in file descriptor in the file map
if (ret > 0) {
gkfs_fd->pos(pos + ret);
ssize_t gkfs::func::pread_ws(int fd, void* buf, size_t count, off64_t offset) {
auto gkfs_fd = CTX->file_map()->get(fd);
return gkfs::func::pread(gkfs_fd, reinterpret_cast<char*>(buf), count, offset);
int gkfs::func::opendir(const std::string& path) {
auto md = gkfs::func::metadata(path);
LOG(DEBUG, "Path is not a directory");
auto open_dir = std::make_shared<OpenDir>(path);
rpc_send::get_dirents(*open_dir);
return CTX->file_map()->add(open_dir);
}
int gkfs::func::rmdir(const std::string& path) {
auto md = gkfs::func::metadata(path);
LOG(DEBUG, "Path '{}' does not exist: ", path);
errno = ENOENT;
return -1;
LOG(DEBUG, "Path '{}' is not a directory", path);
errno = ENOTDIR;
return -1;
}
rpc_send::get_dirents(*open_dir);
if (open_dir->size() != 0) {
return rpc_send::rm_node(path, true, 0);
int gkfs::func::getdents(unsigned int fd,
struct linux_dirent* dirp,
unsigned int count) {
if (open_dir == nullptr) {
//Cast did not succeeded: open_file is a regular file
errno = EBADF;
return -1;
}
auto pos = open_dir->pos();
if (pos >= open_dir->size()) {
return 0;
struct linux_dirent* current_dirp = nullptr;
while (pos < open_dir->size()) {
auto total_size = ALIGN(offsetof(
struct linux_dirent, d_name) +
de.name().size() + 3, sizeof(long));
if (total_size > (count - written)) {
//no enough space left on user buffer to insert next dirent
break;
}
current_dirp = reinterpret_cast<struct linux_dirent*>(
reinterpret_cast<char*>(dirp) + written);
current_dirp->d_ino = std::hash<std::string>()(
open_dir->path() + "/" + de.name());
current_dirp->d_reclen = total_size;
*(reinterpret_cast<char*>(current_dirp) + total_size - 1) =
((de.type() == FileType::regular) ? DT_REG : DT_DIR);
LOG(DEBUG, "name {}: {}", pos, de.name());
std::strcpy(&(current_dirp->d_name[0]), de.name().c_str());
++pos;
current_dirp->d_off = pos;
written += total_size;
}
if (written == 0) {
errno = EINVAL;
return -1;
}
open_dir->pos(pos);
return written;
int gkfs::func::getdents64(unsigned int fd,
struct linux_dirent64* dirp,
unsigned int count) {
auto open_dir = CTX->file_map()->get_dir(fd);
if (open_dir == nullptr) {
//Cast did not succeeded: open_file is a regular file
errno = EBADF;
return -1;
}
auto pos = open_dir->pos();
if (pos >= open_dir->size()) {
return 0;
}
unsigned int written = 0;
struct linux_dirent64* current_dirp = nullptr;
while (pos < open_dir->size()) {
DirEntry de = open_dir->getdent(pos);
auto total_size = ALIGN(offsetof(
struct linux_dirent64, d_name) +
de.name().size() + 3, sizeof(long));
if (total_size > (count - written)) {
//no enough space left on user buffer to insert next dirent
break;
}
current_dirp = reinterpret_cast<struct linux_dirent64*>(
reinterpret_cast<char*>(dirp) + written);
current_dirp->d_ino = std::hash<std::string>()(
open_dir->path() + "/" + de.name());
current_dirp->d_reclen = total_size;
current_dirp->d_type = ((de.type() == FileType::regular) ? DT_REG : DT_DIR);
LOG(DEBUG, "name {}: {}", pos, de.name());
std::strcpy(&(current_dirp->d_name[0]), de.name().c_str());
++pos;
current_dirp->d_off = pos;
written += total_size;
}
if (written == 0) {
errno = EINVAL;
return -1;
}
open_dir->pos(pos);
return written;
}
int gkfs::func::mk_symlink(const std::string& path, const std::string& target_path) {
init_ld_env_if_needed();
/* The following check is not POSIX compliant.
* In POSIX the target is not checked at all.
* Here if the target is a directory we raise a NOTSUP error.
* So that application know we don't support link to directory.
*/
auto target_md = gkfs::func::metadata(target_path, false);
if (target_md != nullptr) {
auto trg_mode = target_md->mode();
if (!(S_ISREG(trg_mode) || S_ISLNK(trg_mode))) {
assert(S_ISDIR(trg_mode));
LOG(DEBUG, "Target path is a directory. Not supported");
errno = ENOTSUP;
return -1;
}
}
auto link_md = gkfs::func::metadata(path, false);
errno = EEXIST;
return -1;
}
return rpc_send::mk_symlink(path, target_path);
}
int gkfs::func::readlink(const std::string& path, char* buf, int bufsize) {
auto md = gkfs::func::metadata(path, false);
LOG(DEBUG, "Named link doesn't exist");
return -1;
}
if (!(md->is_link())) {
LOG(DEBUG, "The named file is not a symbolic link");
errno = EINVAL;
return -1;
}
int path_size = md->target_path().size() + CTX->mountdir().size();
if (path_size >= bufsize) {
LOG(WARNING, "Destination buffer size is too short: {} < {}, {} ", bufsize, path_size, md->target_path());
errno = ENAMETOOLONG;
return -1;
}
CTX->mountdir().copy(buf, CTX->mountdir().size());
std::strcpy(buf + CTX->mountdir().size(), md->target_path().c_str());
return path_size;
}