Verified Commit 58055d57 authored by Alberto Miranda's avatar Alberto Miranda ♨️
Browse files

Add tracking of internal fds

Fixes an issue where client applications such as ssh might attempt to
close all open fds, therefore also closing the fds used by the internal
components of the library such as Mercury.
parent 176074d0
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -112,6 +112,25 @@ if(SYMLINK_SUPPORT)
endif()
message(STATUS "Symlink support: ${SYMLINK_SUPPORT}")

option(USE_BITSET_FOR_INTERNAL_FDS "Use std::bitset to track internal fds" ON)
if(USE_BITSET_FOR_INTERNAL_FDS)
    add_definitions(-DUSE_BITSET_FOR_INTERNAL_FDS)
    execute_process(COMMAND getconf OPEN_MAX
                    OUTPUT_VARIABLE GETCONF_MAX_FDS
                    OUTPUT_STRIP_TRAILING_WHITESPACE
                    ERROR_QUIET)
    if(NOT GETCONF_MAX_FDS)
        set(GETCONF_MAX_FDS=512)
    endif()
    add_definitions(-DMAX_OPEN_FDS=${GETCONF_MAX_FDS})
endif()

message(STATUS "Use std::bitset for internal fd tracking: ${USE_BITSET_FOR_INTERNAL_FDS}")

if(USE_BITSET_FOR_INTERNAL_FDS)
    message(STATUS "Max open files: ${GETCONF_MAX_FDS}")
endif()

configure_file(include/global/configure.hpp.in include/global/configure.hpp)

# Imported target
+15 −0
Original line number Diff line number Diff line
@@ -22,6 +22,10 @@
#include <vector>
#include <string>

#ifdef USE_BITSET_FOR_INTERNAL_FDS
#include <bitset>
#endif // USE_BITSET_FOR_INTERNAL_FDS

/* Forward declarations */
class OpenFileMap;
class Distributor;
@@ -67,6 +71,13 @@ class PreloadContext {

    bool interception_enabled_;

#ifdef USE_BITSET_FOR_INTERNAL_FDS
    std::bitset<MAX_OPEN_FDS> internal_fds_;
#else
    std::set<int> internal_fds_;
#endif // USE_BITSET_FOR_INTERNAL_FDS


    public:
    static PreloadContext* getInstance() {
        static PreloadContext instance;
@@ -109,6 +120,10 @@ class PreloadContext {
    void enable_interception();
    void disable_interception();
    bool interception_enabled() const;

    void register_internal_fd(int fd);
    void unregister_internal_fd(int fd);
    bool is_internal_fd(int fd) const;
};


+7 −0
Original line number Diff line number Diff line
@@ -61,6 +61,13 @@ int hook_close(int fd) {
        CTX->file_map()->remove(fd);
        return 0;
    }

    if(CTX->is_internal_fd(fd)) {
        // the client application (for some reason) is trying to close an 
        // internal fd: ignore it
        return 0;
    }

    return syscall_no_intercept(SYS_close, fd);
}

+513 −268
Original line number Diff line number Diff line
@@ -27,8 +27,230 @@
#define NOT_HOOKED 1
#define HOOKED 0

#if 0
static void
log_write(const char *fmt, ...)
{
    int log_fd = 2;
	if (log_fd < 0)
		return;

	char buf[0x1000];
	int len;
	va_list ap;

	va_start(ap, fmt);
	len = vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
	va_end(ap);


	if (len < 1)
		return;

	buf[len++] = '\n';

	syscall_no_intercept(SYS_write, log_fd, buf, len);
}
#endif

static inline int hook(long syscall_number,
static __thread bool reentrance_guard_flag;


/*
 * hook_internal -- interception hook for internal syscalls
 *
 * This hook is basically used to keep track of file descriptors created 
 * internally by the library itself. This is important because some 
 * applications (e.g. ssh) may attempt to close all open file descriptors
 * which would leave the library internals in an inconsistent state.
 * We forward syscalls to the kernel but we keep track of any syscalls that may
 * create or destroy a file descriptor so that we can mark them as 'internal'.
 */
static inline int 
hook_internal(long syscall_number,
         long arg0, long arg1, long arg2,
         long arg3, long arg4, long arg5,
         long *result)
{

    switch (syscall_number) {

        case SYS_open:
            *result = syscall_no_intercept(syscall_number, 
                                reinterpret_cast<char*>(arg0),
                                static_cast<int>(arg1),
                                static_cast<mode_t>(arg2));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_creat:
            *result = syscall_no_intercept(syscall_number,
                                reinterpret_cast<const char*>(arg0),
                                O_WRONLY | O_CREAT | O_TRUNC,
                                static_cast<mode_t>(arg1));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_openat:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<int>(arg0),
                                reinterpret_cast<const char*>(arg1),
                                static_cast<int>(arg2),
                                static_cast<mode_t>(arg3));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        // epoll_create and epoll_create1 have the same prototype
        case SYS_epoll_create:
        case SYS_epoll_create1:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<int>(arg0));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_dup:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<unsigned int>(arg0));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_dup2:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<unsigned int>(arg0),
                                static_cast<unsigned int>(arg1));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_dup3:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<unsigned int>(arg0),
                                static_cast<unsigned int>(arg1),
                                static_cast<int>(arg2));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_inotify_init:
            *result = syscall_no_intercept(syscall_number);

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_inotify_init1:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<int>(arg0));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_perf_event_open:
            *result = syscall_no_intercept(syscall_number,
                                reinterpret_cast<struct perf_event_attr*>(arg0),
                                static_cast<pid_t>(arg1),
                                static_cast<int>(arg2),
                                static_cast<int>(arg3),
                                static_cast<unsigned long>(arg4));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_signalfd:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<int>(arg0),
                                reinterpret_cast<const sigset_t*>(arg1));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_signalfd4:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<int>(arg0),
                                reinterpret_cast<const sigset_t*>(arg1),
                                static_cast<int>(arg2));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_timerfd_create:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<int>(arg0),
                                static_cast<int>(arg1));

            if(*result != -1) {
                CTX->register_internal_fd(*result);
            }
            break;

        case SYS_close:
            *result = syscall_no_intercept(syscall_number,
                                static_cast<int>(arg0));
            CTX->unregister_internal_fd(*result);
            break;

        default:
            /*
            * Ignore any other syscalls
            * i.e.: pass them on to the kernel
            * as would normally happen.
            */

            #ifndef NDEBUG
            CTX->log()->trace("Syscall [{}, {}]  Passthrough", 
                    syscall_names[syscall_number], syscall_number);
            #endif
            return NOT_HOOKED;
    }

    #ifndef NDEBUG
    CTX->log()->trace("Syscall [{}, {}]  Intercepted", 
            syscall_names[syscall_number], syscall_number);
    #endif

#if 0
    log_write("Internal syscall [%s, %d] = %d", syscall_names[syscall_number]);//, syscall_number, *result);
#endif

    return HOOKED;

}

/*
 * hook -- interception hook for application syscalls
 *
 * This hook is used to implement any application filesystem-related syscalls.
 */
static inline 
int hook(long syscall_number,
         long arg0, long arg1, long arg2,
         long arg3, long arg4, long arg5,
         long *result)
@@ -316,8 +538,20 @@ static inline int hook(long syscall_number,
}


static __thread bool guard_flag;

/*
 * hook_guard_wrapper -- a wrapper which can notice reentrance.
 *
 * The reentrance_guard_flag flag allows the library to distinguish the hooking
 * of its own syscalls. E.g. while handling an open() syscall,
 * libgkfs_intercept might call fopen(), which in turn uses an open()
 * syscall internally. This internally used open() syscall is once again
 * forwarded to libgkfs_intercept, but using this flag we can notice this
 * case of reentering itself.
 *
 * XXX This approach still contains a very significant bug, as libgkfs_intercept
 * being called inside a signal handler might easily forward a mock fd to the
 * kernel.
 */
int
hook_guard_wrapper(long syscall_number,
                   long arg0, long arg1, long arg2,
@@ -326,19 +560,30 @@ hook_guard_wrapper(long syscall_number,
{
    assert(CTX->interception_enabled());

    if (guard_flag) {
        return NOT_HOOKED;
    }
#if 0
    log_write("syscall %s called from %s",
              syscall_names[syscall_number], 
              reentrance_guard_flag ? "gkfs" : "client");
#endif

    int is_hooked;

    guard_flag = true;
    if (reentrance_guard_flag) {
        int oerrno = errno;
        is_hooked = hook_internal(syscall_number,
                                  arg0, arg1, arg2, arg3, arg4, arg5,
                                  syscall_return_value);
        errno = oerrno;
        return is_hooked;
    }

    reentrance_guard_flag = true;
    int oerrno = errno;
    is_hooked = hook(syscall_number,
                     arg0, arg1, arg2, arg3, arg4, arg5,
                     syscall_return_value);
    errno = oerrno;
    guard_flag = false;
    reentrance_guard_flag = false;

    return is_hooked;
}
+53 −0
Original line number Diff line number Diff line
@@ -26,6 +26,9 @@
#include <client/rpc/hg_rpcs.hpp>
#include <hermes.hpp>

#include <sys/types.h>
#include <dirent.h>

#include <fstream>


@@ -104,6 +107,33 @@ bool init_hermes_client(const std::string& transport_prefix) {
    return true;
}

static inline std::set<int>
query_open_fds() {

    std::set<int> fds;
    const std::string path{"/proc/self/fd"};

    std::unique_ptr<DIR, decltype(&::closedir)> dirp(
            ::opendir(path.c_str()), 
            closedir);

    struct dirent entry;
    struct dirent *result;

    while (::readdir_r(dirp.get(), &entry, &result) == 0 && result != NULL) {
        const std::string name{entry.d_name};

        if(name == "." || name == ".." || 
           std::stoi(name) == dirfd(dirp.get())) {
            continue;
        }

        fds.insert(std::stoi(name));
    }

    return fds;
}


/**
 * This function is only called in the preload constructor and initializes 
@@ -111,6 +141,17 @@ bool init_hermes_client(const std::string& transport_prefix) {
 */
void init_ld_environment_() {

    // Client applications such as ssh attempt to close all open file 
    // descriptors, which causes havoc with the interception library's internal 
    // state. To account for this, in the interception code we keep track of
    // internal fds by distinguishing between internal syscalls (i.e. those
    // coming from internal code) application syscalls. The problem is that
    // at this point in initialization we have not enabled interception yet,
    // but the initialization process itself needs to create file descriptors.
    // To solve this problem, we find out which fds are created by the 
    // initialization process and manually protect them at this point
    auto pre_init_fds = query_open_fds();

    // initialize Hermes interface to Mercury
    if (!init_hermes_client(RPC_PROTOCOL)) {
        exit_error_msg(EXIT_FAILURE, "Unable to initialize Hermes RPC client");
@@ -130,6 +171,17 @@ void init_ld_environment_() {
        exit_error_msg(EXIT_FAILURE, "Unable to fetch file system configurations from daemon process through RPC.");
    }

    auto post_init_fds = query_open_fds();
    std::set<int> internal_fds{3}; // fd 3 is created by the logging system

    std::set_difference(post_init_fds.begin(), post_init_fds.end(),
                        pre_init_fds.begin(), pre_init_fds.end(),
                        std::inserter(internal_fds, internal_fds.end()));

    for(const auto& fd : internal_fds) {
        CTX->register_internal_fd(fd);
    }

    CTX->log()->info("{}() Environment initialization successful.", __func__);
}

@@ -178,6 +230,7 @@ void log_prog_name() {
 * Called initially ONCE when preload library is used with the LD_PRELOAD environment variable
 */
void init_preload() {

    init_logging();
    CTX->log()->debug("Initialized logging subsystem");
    log_prog_name();
Loading