Newer
Older
Copyright 2018-2022, Barcelona Supercomputing Center (BSC), Spain
Copyright 2015-2022, Johannes Gutenberg Universitaet Mainz, Germany
This software was partially supported by the
EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).
This software was partially supported by the
ADA-FS project under the SPPEXA project funded by the DFG.
This file is part of GekkoFS' POSIX interface.
GekkoFS' POSIX interface is free software: you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
GekkoFS' POSIX interface is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with GekkoFS' POSIX interface. If not, see
<https://www.gnu.org/licenses/>.
SPDX-License-Identifier: LGPL-3.0-or-later
*/
#include <config.hpp>
#include <client/preload_util.hpp>
#include <client/logging.hpp>
#include <client/gkfs_functions.hpp>
#include <client/rpc/forward_metadata.hpp>
#include <client/rpc/forward_data.hpp>
#include <common/path_util.hpp>
extern "C" {
#include <dirent.h> // used for file types in the getdents{,64}() functions
#include <linux/kernel.h> // used for definition of alignment macros
#include <sys/statfs.h>
#include <sys/statvfs.h>
}
using namespace std;
/*
* Macro used within getdents{,64} functions.
* __ALIGN_KERNEL defined in linux/kernel.h
*/
#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
* linux_dirent is used in getdents() but is privately defined in the linux
* kernel: fs/readdir.c.
*/
struct linux_dirent {
unsigned long d_ino;
unsigned long d_off;
unsigned short d_reclen;
char d_name[1];
};
/*
* linux_dirent64 is used in getdents64() and defined in the linux kernel:
* include/linux/dirent.h. However, it is not part of the kernel-headers and
* cannot be imported.
*/
struct linux_dirent64 {
uint64_t d_ino;
int64_t d_off;
unsigned short d_reclen;
unsigned char d_type;
char d_name[1]; // originally `char d_name[0]` in kernel, but ISO C++
// forbids zero-size array 'd_name'
Ramon Nou
committed
struct dirent_extended {
size_t size;
time_t ctime;
unsigned short d_reclen;
unsigned char d_type;
char d_name[1]; // originally `char d_name[0]` in kernel, but ISO C++
// forbids zero-size array 'd_name'
};
* Checks if metadata for parent directory exists (can be disabled with
* CREATE_CHECK_PARENTS). errno may be set
* @param path
* @return 0 on success, -1 on failure
*/
int
check_parent_dir(const std::string& path) {
auto p_comp = gkfs::path::dirname(path);
auto md = gkfs::utils::get_metadata(p_comp);
if(!md) {
if(errno == ENOENT) {
LOG(DEBUG, "Parent component does not exist: '{}'", p_comp);
} else {
LOG(ERROR, "Failed to get metadata for parent component '{}': {}",
path, strerror(errno));
if(!S_ISDIR(md->mode())) {
LOG(DEBUG, "Parent component is not a directory: '{}'", p_comp);
errno = ENOTDIR;
return -1;
}
#endif // CREATE_CHECK_PARENTS
return 0;
}
/**
* gkfs wrapper for open() system calls
* errno may be set
* @param path
* @param mode
* @param flags
* @return 0 on success, -1 on failure
*/
gkfs_open(const std::string& path, mode_t mode, int flags, bool rename) {
LOG(ERROR, "`O_PATH` flag is not supported");
errno = ENOTSUP;
return -1;
}
LOG(ERROR, "`O_APPEND` flag is not supported");
errno = ENOTSUP;
return -1;
}
// metadata object filled during create or stat
gkfs::metadata::Metadata md{};
if(flags & O_CREAT) {
if(flags & O_DIRECTORY) {
LOG(ERROR, "O_DIRECTORY use with O_CREAT. NOT SUPPORTED");
// no access check required here. If one is using our FS they have the
// permissions.
auto err = gkfs_create(path, mode | S_IFREG);
if(err) {
if(errno == EEXIST) {
// file exists, O_CREAT was set
if(flags & O_EXCL) {
// File exists and O_EXCL & O_CREAT was set
return -1;
}
// file exists, O_CREAT was set O_EXCL wasnt, so function does
// not fail this case is actually undefined as per `man 2 open`
auto md_ = gkfs::utils::get_metadata(path);
if(!md_) {
LOG(ERROR,
"Could not get metadata after creating file '{}': '{}'",
path, strerror(errno));
return -1;
}
md = *md_;
#ifdef HAS_RENAME
if(rename == false && md.blocks() == -1) {
LOG(DEBUG, "File is renamed '{}': '{}' - rename: {}", path,
rename);
return -1;
}
#endif
} else {
LOG(ERROR, "Error creating file: '{}'", strerror(errno));
return -1;
}
} else {
// file was successfully created. Add to filemap
return CTX->file_map()->add(
std::make_shared<gkfs::filemap::OpenFile>(path, flags));
auto md_ = gkfs::utils::get_metadata(path);
if(!md_) {
if(errno != ENOENT) {
LOG(ERROR, "Error stating existing file '{}'", path);
// file doesn't exists and O_CREAT was not set
return -1;
if(md.is_link()) {
if(flags & O_NOFOLLOW) {
LOG(WARNING, "Symlink found and O_NOFOLLOW flag was specified");
errno = ELOOP;
return -1;
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
} else {
if(md.target_path() != "") {
auto md_ = gkfs::utils::get_metadata(md.target_path());
new_path = md.target_path();
while(md_.value().target_path() != "") {
new_path = md_.value().target_path();
md_ = gkfs::utils::get_metadata(md_.value().target_path(),
false);
if(!md_) {
return -1;
}
}
md = *md_;
if(S_ISDIR(md.mode())) {
return gkfs_opendir(new_path);
}
/*** Regular file exists ***/
assert(S_ISREG(md.mode()));
if((flags & O_TRUNC) && ((flags & O_RDWR) || (flags & O_WRONLY))) {
if(gkfs_truncate(new_path, md.size(), 0)) {
LOG(ERROR, "Error truncating file");
return -1;
}
}
return CTX->file_map()->add(
std::make_shared<gkfs::filemap::OpenFile>(new_path, flags));
}
if(S_ISDIR(md.mode())) {
return gkfs_opendir(path);
}
/*** Regular file exists ***/
assert(S_ISREG(md.mode()));
if((flags & O_TRUNC) && ((flags & O_RDWR) || (flags & O_WRONLY))) {
if(gkfs_truncate(path, md.size(), 0)) {
LOG(ERROR, "Error truncating file");
return -1;
return CTX->file_map()->add(
std::make_shared<gkfs::filemap::OpenFile>(path, flags));
/**
* Wrapper function for file/directory creation
* errno may be set
* @param path
* @param mode
* @return 0 on success, -1 on failure
*/
int
gkfs_create(const std::string& path, mode_t mode) {
// file type must be set
switch(mode & S_IFMT) {
case 0:
mode |= S_IFREG;
break;
case S_IFREG: // intentionally fall-through
case S_IFDIR:
break;
case S_IFCHR: // intentionally fall-through
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
LOG(WARNING, "Unsupported node type");
errno = ENOTSUP;
return -1;
default:
LOG(WARNING, "Unrecognized node type");
if(check_parent_dir(path)) {
* gkfs wrapper for unlink() system calls
* errno may be set
* @return 0 on success, -1 on failure
int
gkfs_remove(const std::string& path) {
auto md = gkfs::utils::get_metadata(path);
return -1;
if(S_ISDIR(md->mode())) {
LOG(ERROR, "Cannot remove directory '{}'", path);
errno = EISDIR;
return -1;
}
#ifdef HAS_RENAME
if(md.value().blocks() == -1) {
errno = ENOENT;
return -1;
} else {
if(md.value().target_path() != "") {
auto md_ = gkfs::utils::get_metadata(md.value().target_path());
std::string new_path = md.value().target_path();
while(md.value().target_path() != "") {
new_path = md.value().target_path();
md = gkfs::utils::get_metadata(md.value().target_path(), false);
if(!md) {
return -1;
}
}
auto err = gkfs::rpc::forward_remove(new_path);
if(err) {
errno = err;
return -1;
}
return 0;
}
}
auto err = gkfs::rpc::forward_remove(path);
/**
* gkfs wrapper for access() system calls
* errno may be set
* @param path
* @param mask
* @param follow_links
* @return 0 on success, -1 on failure
*/
int
gkfs_access(const std::string& path, const int mask, bool follow_links) {
auto md = gkfs::utils::get_metadata(path, follow_links);
/**
* gkfs wrapper for rename() system calls
* errno may be set
* We use blocks to determine if the file is a renamed file.
* If the file is re-renamed (a->b->a) a recovers the block of b
* and we delete b.
* @param old_path
* @param new_path
* @return 0 on success, -1 on failure
*/
int
gkfs_rename(const string& old_path, const string& new_path) {
auto md = gkfs::utils::get_metadata(old_path, false);
// if the file is not found, or it is a renamed one cancel.
if(!md or md.value().blocks() == -1) {
return -1;
}
auto md2 = gkfs::utils::get_metadata(new_path, false);
if(md2) {
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
// the new file exists... check for circular...
if(md2.value().blocks() == -1 and
md.value().target_path() == new_path) {
// the new file is a renamed file, so we need to get the metadata of
// the original file.
LOG(DEBUG, "Destroying Circular Rename '{}' --> '{}'", old_path,
new_path);
gkfs::metadata::MetadentryUpdateFlags flags;
flags.atime = false;
flags.mtime = false;
flags.ctime = false;
flags.blocks = true;
flags.mode = false;
flags.size = false;
flags.uid = false;
flags.gid = false;
flags.link_count = false;
md.value().blocks(0);
md.value().target_path("");
auto err = gkfs::rpc::forward_update_metadentry(new_path,
md.value(), flags);
if(err) {
errno = err;
return -1;
}
// Delete old file
err = gkfs::rpc::forward_remove(old_path);
if(err) {
errno = err;
return -1;
}
return 0;
}
return -1;
}
auto err = gkfs::rpc::forward_rename(old_path, new_path, md.value());
if(err) {
errno = err;
return -1;
}
return 0;
}
/**
* gkfs wrapper for stat() system calls
* errno may be set
* @param path
* @param buf
* @param follow_links
* @return 0 on success, -1 on failure
*/
int
gkfs_stat(const string& path, struct stat* buf, bool follow_links) {
auto md = gkfs::utils::get_metadata(path, follow_links);
#ifdef HAS_RENAME
if(md.value().blocks() == -1) {
errno = ENOENT;
return -1;
} else {
while(md.value().target_path() != "") {
md = gkfs::utils::get_metadata(md.value().target_path(), false);
if(!md) {
return -1;
}
gkfs::utils::metadata_to_stat(path, *md, *buf);
#ifdef STATX_TYPE
/**
* gkfs wrapper for statx() system calls
* errno may be set
* @param dirfs
* @param path
* @param flags
* @param mask
* @param buf
* @param follow_links
* @return 0 on success, -1 on failure
*/
int
gkfs_statx(int dirfs, const std::string& path, int flags, unsigned int mask,
struct statx* buf, bool follow_links) {
auto md = gkfs::utils::get_metadata(path, follow_links);
#ifdef HAS_RENAME
if(md.value().blocks() == -1) {
errno = ENOENT;
return -1;
} else {
while(md.value().target_path() != "") {
md = gkfs::utils::get_metadata(md.value().target_path(), false);
if(!md) {
return -1;
}
gkfs::utils::metadata_to_stat(path, *md, tmp);
buf->stx_mask = 0;
buf->stx_blksize = tmp.st_blksize;
buf->stx_attributes = 0;
buf->stx_nlink = tmp.st_nlink;
buf->stx_uid = tmp.st_uid;
buf->stx_gid = tmp.st_gid;
buf->stx_mode = tmp.st_mode;
buf->stx_ino = tmp.st_ino;
buf->stx_size = tmp.st_size;
buf->stx_blocks = tmp.st_blocks;
buf->stx_attributes_mask = 0;
buf->stx_atime.tv_sec = tmp.st_atim.tv_sec;
buf->stx_atime.tv_nsec = tmp.st_atim.tv_nsec;
buf->stx_mtime.tv_sec = tmp.st_mtim.tv_sec;
buf->stx_mtime.tv_nsec = tmp.st_mtim.tv_nsec;
buf->stx_ctime.tv_sec = tmp.st_ctim.tv_sec;
buf->stx_ctime.tv_nsec = tmp.st_ctim.tv_nsec;
buf->stx_btime = buf->stx_atime;
/**
* gkfs wrapper for statfs() system calls
* errno may be set
* @param buf
* @return 0 on success, -1 on failure
*/
int
gkfs_statfs(struct statfs* buf) {
auto ret = gkfs::rpc::forward_get_chunk_stat();
auto err = ret.first;
LOG(ERROR, "{}() Failure with error: '{}'", err);
errno = err;
buf->f_type = 0;
buf->f_bsize = blk_stat.chunk_size;
buf->f_blocks = blk_stat.chunk_total;
buf->f_bfree = blk_stat.chunk_free;
buf->f_bavail = blk_stat.chunk_free;
buf->f_files = 0;
buf->f_ffree = 0;
buf->f_fsid = {0, 0};
buf->f_namelen = path::max_length;
ST_NOATIME | ST_NODIRATIME | ST_NOSUID | ST_NODEV | ST_SYNCHRONOUS;
/**
* gkfs wrapper for statvfs() system calls
* errno may be set
*
* NOTE: Currently unused.
*
* @param buf
* @return 0 on success, -1 on failure
*/
int
gkfs_statvfs(struct statvfs* buf) {
auto ret = gkfs::rpc::forward_get_chunk_stat();
auto err = ret.first;
LOG(ERROR, "{}() Failure with error: '{}'", err);
errno = err;
buf->f_bsize = blk_stat.chunk_size;
buf->f_blocks = blk_stat.chunk_total;
buf->f_bfree = blk_stat.chunk_free;
buf->f_bavail = blk_stat.chunk_free;
buf->f_files = 0;
buf->f_ffree = 0;
buf->f_favail = 0;
buf->f_fsid = 0;
buf->f_namemax = path::max_length;
ST_NOATIME | ST_NODIRATIME | ST_NOSUID | ST_NODEV | ST_SYNCHRONOUS;
/**
* gkfs wrapper for lseek() system calls with available file descriptor
* errno may be set
* @param fd
* @param offset
* @param whence
* @return 0 on success, -1 on failure
*/
off_t
gkfs_lseek(unsigned int fd, off_t offset, unsigned int whence) {
return gkfs_lseek(CTX->file_map()->get(fd), offset, whence);
* gkfs wrapper for lseek() system calls with available shared ptr to gkfs
* FileMap errno may be set
* @param gkfs_fd
* @param offset
* @param whence
* @return 0 on success, -1 on failure
*/
off_t
gkfs_lseek(shared_ptr<gkfs::filemap::OpenFile> gkfs_fd, off_t offset,
unsigned int whence) {
switch(whence) {
case SEEK_SET:
errno = EINVAL;
return -1;
}
gkfs_fd->pos(offset);
break;
case SEEK_CUR:
gkfs_fd->pos(gkfs_fd->pos() + offset);
break;
case SEEK_END: {
auto ret = gkfs::rpc::forward_get_metadentry_size(gkfs_fd->path());
auto err = ret.first;
return -1;
}
if(offset < 0 && file_size < -offset) {
errno = EINVAL;
return -1;
}
gkfs_fd->pos(file_size + offset);
break;
}
case SEEK_DATA:
LOG(WARNING, "SEEK_DATA whence is not supported");
// We do not support this whence yet
errno = EINVAL;
return -1;
case SEEK_HOLE:
LOG(WARNING, "SEEK_HOLE whence is not supported");
// We do not support this whence yet
errno = EINVAL;
return -1;
default:
LOG(WARNING, "Unknown whence value {:#x}", whence);
errno = EINVAL;
return -1;
}
return gkfs_fd->pos();
/**
* wrapper function for gkfs_truncate
* errno may be set
* @param path
* @param old_size
* @param new_size
* @return 0 on success, -1 on failure
*/
int
gkfs_truncate(const std::string& path, off_t old_size, off_t new_size) {
assert(new_size <= old_size);
if(new_size == old_size) {
LOG(DEBUG, "Failed to decrease size");
err = gkfs::rpc::forward_truncate(path, old_size, new_size);
LOG(DEBUG, "Failed to truncate data");
/**
* gkfs wrapper for truncate() system calls
* errno may be set
* @param path
* @param length
* @return 0 on success, -1 on failure
*/
int
gkfs_truncate(const std::string& path, off_t length) {
/* TODO CONCURRENCY:
* At the moment we first ask the length to the metadata-server in order to
* know which data-server have data to be deleted.
*
* From the moment we issue the gkfs_stat and the moment we issue the
* gkfs_trunc_data, some more data could have been added to the file and the
LOG(DEBUG, "Length is negative: {}", length);
auto md = gkfs::utils::get_metadata(path, true);
#ifdef HAS_RENAME
if(md.value().blocks() == -1) {
errno = ENOENT;
return -1;
} else if(md.value().target_path() != "") {
std::string new_path;
while(md.value().target_path() != "") {
new_path = md.value().target_path();
md = gkfs::utils::get_metadata(md.value().target_path());
}
// This could be optimized
auto size = md->size();
if(static_cast<unsigned long>(length) > size) {
LOG(DEBUG, "Length is greater then file size: {} > {}", length,
size);
errno = EINVAL;
return -1;
}
return gkfs_truncate(new_path, size, length);
}
if(static_cast<unsigned long>(length) > size) {
LOG(DEBUG, "Length is greater then file size: {} > {}", length, size);
return gkfs_truncate(path, size, length);
/**
* gkfs wrapper for dup() system calls
* errno may be set
* @param oldfd
* @return file descriptor int or -1 on error
*/
int
gkfs_dup(const int oldfd) {
return CTX->file_map()->dup(oldfd);
/**
* gkfs wrapper for dup2() system calls
* errno may be set
* @param oldfd
* @param newfd
* @return file descriptor int or -1 on error
*/
int
gkfs_dup2(const int oldfd, const int newfd) {
return CTX->file_map()->dup2(oldfd, newfd);
/**
* Wrapper function for all gkfs write operations
* errno may be set
* @param file
* @param buf
* @param count
* @param offset
* @return written size or -1 on error
*/
ssize_t
gkfs_pwrite(std::shared_ptr<gkfs::filemap::OpenFile> file, const char* buf,
size_t count, off64_t offset) {
if(file->type() != gkfs::filemap::FileType::regular) {
assert(file->type() == gkfs::filemap::FileType::directory);
LOG(WARNING, "Cannot read from directory");
auto path = make_shared<string>(file->path());
auto append_flag = file->get_flag(gkfs::filemap::OpenFile_flags::append);
auto ret_update_size = gkfs::rpc::forward_update_metadentry_size(
*path, count, offset, append_flag);
LOG(ERROR, "update_metadentry_size() failed with err '{}'", err);
errno = err;
return -1;
auto ret_write = gkfs::rpc::forward_write(*path, buf, append_flag, offset,
count, updated_size);
LOG(WARNING, "gkfs::rpc::forward_write() failed with err '{}'", err);
errno = err;
return -1;
/**
* gkfs wrapper for pwrite() system calls
* errno may be set
* @param fd
* @param buf
* @param count
* @param offset
* @return written size or -1 on error
*/
ssize_t
gkfs_pwrite_ws(int fd, const void* buf, size_t count, off64_t offset) {
auto file = CTX->file_map()->get(fd);
return gkfs_pwrite(file, reinterpret_cast<const char*>(buf), count, offset);
/**
* gkfs wrapper for write() system calls
* errno may be set
* @param fd
* @param buf
* @param count
* @return written size or -1 on error
*/
ssize_t
gkfs_write(int fd, const void* buf, size_t count) {
auto gkfs_fd = CTX->file_map()->get(fd);
auto pos = gkfs_fd->pos(); // retrieve the current offset
if(gkfs_fd->get_flag(gkfs::filemap::OpenFile_flags::append))
gkfs_lseek(gkfs_fd, 0, SEEK_END);
auto ret = gkfs_pwrite(gkfs_fd, reinterpret_cast<const char*>(buf), count,
pos);
// Update offset in file descriptor in the file map
gkfs_fd->pos(pos + count);
/**
* gkfs wrapper for pwritev() system calls
* errno may be set
* @param fd
* @param iov
* @param iovcnt
* @param offset
* @return written size or -1 on error
*/
ssize_t
gkfs_pwritev(int fd, const struct iovec* iov, int iovcnt, off_t offset) {
auto file = CTX->file_map()->get(fd);
ssize_t written = 0;
ssize_t ret;
for(int i = 0; i < iovcnt; ++i) {
auto count = (iov + i)->iov_len;
auto buf = (iov + i)->iov_base;
ret = gkfs_pwrite(file, reinterpret_cast<char*>(buf), count, pos);
break;
}
written += ret;
pos += ret;
if(static_cast<size_t>(ret) < count) {
return -1;
}
return written;
}
/**
* gkfs wrapper for writev() system calls
* errno may be set
* @param fd
* @param iov
* @param iovcnt
* @return written size or -1 on error
*/
ssize_t
gkfs_writev(int fd, const struct iovec* iov, int iovcnt) {
auto gkfs_fd = CTX->file_map()->get(fd);
auto pos = gkfs_fd->pos(); // retrieve the current offset
auto ret = gkfs_pwritev(fd, iov, iovcnt, pos);
gkfs_fd->pos(pos + ret);
/**
* Wrapper function for all gkfs read operations
* @param file
* @param buf
* @param count
* @param offset
* @return read size or -1 on error
*/
ssize_t
gkfs_pread(std::shared_ptr<gkfs::filemap::OpenFile> file, char* buf,
size_t count, off64_t offset) {
if(file->type() != gkfs::filemap::FileType::regular) {
assert(file->type() == gkfs::filemap::FileType::directory);
LOG(WARNING, "Cannot read from directory");
// Zeroing buffer before read is only relevant for sparse files. Otherwise
// sparse regions contain invalid data.
if constexpr(gkfs::config::io::zero_buffer_before_read) {
memset(buf, 0, sizeof(char) * count);
}
auto ret = gkfs::rpc::forward_read(file->path(), buf, offset, count);
LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'", err);
errno = err;
return -1;
}
// XXX check that we don't try to read past end of the file
/**
* gkfs wrapper for read() system calls
* errno may be set
* @param fd
* @param buf
* @param count
* @return read size or -1 on error
*/
ssize_t
gkfs_read(int fd, void* buf, size_t count) {
auto gkfs_fd = CTX->file_map()->get(fd);
auto pos = gkfs_fd->pos(); // retrieve the current offset
auto ret = gkfs_pread(gkfs_fd, reinterpret_cast<char*>(buf), count, pos);
// Update offset in file descriptor in the file map
gkfs_fd->pos(pos + ret);