Unverified Commit ac016ef2 authored by Alberto Miranda's avatar Alberto Miranda ♨️
Browse files

Fix issue with fds created by kernel modules

parent 5474d22b
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -400,7 +400,6 @@ struct logger {
    const date::time_zone * const timezone_;
};


// the following static functions can be used to interact 
// with a globally registered logger instance

+7 −2
Original line number Diff line number Diff line
@@ -56,8 +56,8 @@ enum class RelativizeStatus {

class PreloadContext {

    static const auto constexpr INTERNAL_FD_BASE = 
        MAX_OPEN_FDS - MAX_INTERNAL_FDS;
    static auto constexpr MIN_INTERNAL_FD = MAX_OPEN_FDS - MAX_INTERNAL_FDS;
    static auto constexpr MAX_USER_FDS = MIN_INTERNAL_FD;

    private:
    PreloadContext();
@@ -77,6 +77,8 @@ class PreloadContext {

    std::bitset<MAX_INTERNAL_FDS> internal_fds_;
    mutable std::mutex internal_fds_mutex_;
    bool internal_fds_must_relocate_;
    std::bitset<MAX_USER_FDS> protected_fds_;

    public:
    static PreloadContext* getInstance() {
@@ -122,6 +124,9 @@ class PreloadContext {
    int register_internal_fd(int fd);
    void unregister_internal_fd(int fd);
    bool is_internal_fd(int fd) const;

    void protect_user_fds();
    void unprotect_user_fds(); 
};


+4 −10
Original line number Diff line number Diff line
@@ -192,15 +192,8 @@ logger::logger(const std::string& opts,
               bool trunc) :
    timezone_(date::current_zone()) {

    /* use stderr (dup()ed to an internal fd) by default */
    log_fd_ = ::dup(2);

    if(log_fd_ == -1) {
        log(gkfs::log::error, __func__, __LINE__, "Failed to dup stderr. "
            "Logging will fall back to normal stderr", path);
    /* use stderr by default */
    log_fd_ = 2;
    }

    log_mask_ = process_log_options(opts);

    if(!path.empty()) {
@@ -212,7 +205,8 @@ logger::logger(const std::string& opts,

        // we use ::open() here rather than ::syscall_no_intercept(SYS_open)
        // because we want the call to be intercepted by our hooks, which 
        // allows us to categorize the resulting fd as 'internal'
        // allows us to categorize the resulting fd as 'internal' and
        // relocate it to our private range
		int fd = ::open(path.c_str(), flags, 0600);

		if(fd == -1) {
+13 −1
Original line number Diff line number Diff line
@@ -198,16 +198,28 @@ void init_preload() {
    start_self_interception();

    CTX->init_logging();

    // from here ownwards it is safe to print messages
    LOG(DEBUG, "Logging subsystem initialized");

    // Kernel modules such as ib_uverbs may create fds in kernel space and pass
    // them to user-space processes using ioctl()-like interfaces. if this 
    // happens during our internal initialization, there's no way for us to 
    // control this creation and the fd will be created in the 
    // [0, MAX_USER_FDS) range rather than in our private 
    // [MAX_USER_FDS, MAX_OPEN_FDS) range. To prevent this for our internal 
    // initialization code, we forcefully occupy the user fd range to force 
    // such modules to create fds in our private range.
    CTX->protect_user_fds();

    log_prog_name();
    init_cwd();

    LOG(DEBUG, "Current working directory: '{}'", CTX->cwd());
    init_ld_env_if_needed();
    CTX->enable_interception();

    CTX->unprotect_user_fds();

    start_interception();
}

+74 −10
Original line number Diff line number Diff line
@@ -28,12 +28,17 @@
#include <cassert>
#include <cstdio>

decltype(PreloadContext::MIN_INTERNAL_FD) constexpr 
PreloadContext::MIN_INTERNAL_FD;
decltype(PreloadContext::MAX_USER_FDS) constexpr 
PreloadContext::MAX_USER_FDS;

PreloadContext::PreloadContext():
    ofm_(std::make_shared<OpenFileMap>()),
    fs_conf_(std::make_shared<FsConfig>()) {

    internal_fds_.set();
    internal_fds_must_relocate_ = true;
}

void 
@@ -192,17 +197,30 @@ bool PreloadContext::interception_enabled() const {

int PreloadContext::register_internal_fd(int fd) {

    LOG(DEBUG, "registering fd {} as internal", fd);

    assert(fd >= 0);

    if(!internal_fds_must_relocate_) {
        LOG(DEBUG, "registering fd {} as internal (no relocation needed)", fd);
        assert(fd >= MIN_INTERNAL_FD);
        internal_fds_.reset(fd - MIN_INTERNAL_FD);
        return fd;
    }

    LOG(DEBUG, "registering fd {} as internal (needs relocation)", fd);

    std::lock_guard<std::mutex> lock(internal_fds_mutex_);
    const int pos = internal_fds_._Find_first();

    if(static_cast<std::size_t>(pos) == internal_fds_.size()) {
        throw std::runtime_error(
"Internal GekkoFS file descriptors exhausted, increase MAX_INTERNAL_FDS in "
"CMake, rebuild GekkoFS and try again.");
    }
    internal_fds_.reset(pos);


#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD)
    long args[gkfs::syscall::MAX_ARGS]{fd, pos + INTERNAL_FD_BASE, O_CLOEXEC};
    long args[gkfs::syscall::MAX_ARGS]{fd, pos + MIN_INTERNAL_FD, O_CLOEXEC};
#endif

    LOG(SYSCALL, 
@@ -212,7 +230,7 @@ int PreloadContext::register_internal_fd(int fd) {
        SYS_dup3, args);

    const int ifd = 
        ::syscall_no_intercept(SYS_dup3, fd, pos + INTERNAL_FD_BASE, O_CLOEXEC);
        ::syscall_no_intercept(SYS_dup3, fd, pos + MIN_INTERNAL_FD, O_CLOEXEC);

    LOG(SYSCALL, 
        gkfs::syscall::from_internal_code | 
@@ -220,7 +238,7 @@ int PreloadContext::register_internal_fd(int fd) {
        gkfs::syscall::executed, 
        SYS_dup3, args, ifd);

    assert(::syscall_error_code(ifd) != -1);
    assert(::syscall_error_code(ifd) == 0);

#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD)
    long args2[gkfs::syscall::MAX_ARGS]{fd};
@@ -244,7 +262,7 @@ int PreloadContext::register_internal_fd(int fd) {
        gkfs::syscall::executed, 
        SYS_close, args2, rv);

    LOG(DEBUG, "    (fd {} reassigned to ifd {})", fd, ifd);
    LOG(DEBUG, "    (fd {} relocated to ifd {})", fd, ifd);

    return ifd;
}
@@ -253,9 +271,9 @@ void PreloadContext::unregister_internal_fd(int fd) {

    LOG(DEBUG, "unregistering internal fd {}", fd);

    assert(fd >= INTERNAL_FD_BASE);
    assert(fd >= MIN_INTERNAL_FD);

    const auto pos = fd - INTERNAL_FD_BASE;
    const auto pos = fd - MIN_INTERNAL_FD;

    std::lock_guard<std::mutex> lock(internal_fds_mutex_);
    internal_fds_.set(pos);
@@ -263,14 +281,60 @@ void PreloadContext::unregister_internal_fd(int fd) {

bool PreloadContext::is_internal_fd(int fd) const {

    if(fd < INTERNAL_FD_BASE) {
    if(fd < MIN_INTERNAL_FD) {
        return false;
    }

    const auto pos = fd - INTERNAL_FD_BASE;
    const auto pos = fd - MIN_INTERNAL_FD;

    std::lock_guard<std::mutex> lock(internal_fds_mutex_);
    return !internal_fds_.test(pos);
}

void
PreloadContext::protect_user_fds() {

    LOG(DEBUG, "Protecting application fds [{}, {}]", 0, MAX_USER_FDS - 1);

    const int nullfd = ::syscall_no_intercept(SYS_open, "/dev/null", O_RDONLY);
    assert(::syscall_error_code(nullfd) == 0);
    protected_fds_.set(nullfd);

    const auto fd_is_open = [](int fd) -> bool {
        const int ret = ::syscall_no_intercept(SYS_fcntl, fd, F_GETFD);
        return ::syscall_error_code(ret) == 0 || 
               ::syscall_error_code(ret) != EBADF;
    };

    for(int fd = 0; fd < MAX_USER_FDS; ++fd) {
        if(fd_is_open(fd)) {
            LOG(DEBUG, "  fd {} was already in use, skipping", fd);
            continue;
        }

        const int ret = ::syscall_no_intercept(SYS_dup3, nullfd, fd, O_CLOEXEC);
        assert(::syscall_error_code(ret) == 0);
        protected_fds_.set(fd);
    }

    internal_fds_must_relocate_ = false;
}

void
PreloadContext::unprotect_user_fds() {

    for(std::size_t fd = 0; fd < protected_fds_.size(); ++fd) {
        if(!protected_fds_[fd]) {
            continue;
        }

        const int ret = 
            ::syscall_error_code(::syscall_no_intercept(SYS_close, fd));

        if(ret != 0) {
            LOG(ERROR, "Failed to unprotect fd")
        }
    }

    internal_fds_must_relocate_ = true;
}