Commit b01f6eb4 authored by Marc Vef's avatar Marc Vef
Browse files

gkfs run script proxy support

parent 344abd32
Loading
Loading
Loading
Loading
Loading
+84 −20
Original line number Diff line number Diff line
@@ -48,7 +48,7 @@ wait_for_gkfs_daemons() {
# Creates a pid file for a given pid. If pid file exists, we check if its pids are still valid.
# If valid, an additional line is added. Otherwise, the pid in the file is deleted.
# Globals:
#   DAEMON_PID_FILE
#   SRUN_DAEMON_PID_FILE
#   VERBOSE
# Arguments:
#   pid to write to pid file
@@ -56,14 +56,14 @@ wait_for_gkfs_daemons() {
#   Writes status to stdout if VERBOSE is true
#######################################
create_pid_file() {
    local pid_file=${DAEMON_PID_FILE}
    local pid_file=${SRUN_DAEMON_PID_FILE}
    local pid=${1}
    if [[ ${VERBOSE} == true ]]; then
        echo -e "${C_AST_GREEN}Creating pid file at ${pid_file} with pid ${pid} ..."
    fi
    # if PID file exists another daemon could run
    if [[ -e ${pid_file} ]]; then
        local pid_file_tmp=${DAEMON_PID_FILE}.swp
        local pid_file_tmp=${SRUN_DAEMON_PID_FILE}.swp
        # create empty tmp file
        truncate -s 0 "${pid_file_tmp}"
        while IFS= read -r line
@@ -85,10 +85,13 @@ create_pid_file() {
#   NODE_NUM
#   MOUNTDIR
#   ROOTDIR
#   ARGS
#   DAEMON_ARGS_
#   PROXY_ARGS_
#   CPUS_PER_TASK
#   VERBOSE
#   USE_NUMACTL
#   DAEMON_NUMACTL_
#   PROXY_NUMACTL_
#   USE_PROXY
#   DAEMON_CPUNODEBIND
#   DAEMON_MEMBIND
#   GKFS_DAEMON_LOG_PATH
@@ -101,6 +104,7 @@ start_daemon() {
    local node_list
    local srun_cmd
    local daemon_execute
    local proxy_execute
    # setup
    if [[ ${USE_SRUN} == true ]]; then
        node_list=$(scontrol show job "${SLURM_JOB_ID}" | grep " NodeList=" | cut -d "=" -f2)
@@ -117,29 +121,45 @@ start_daemon() {
        echo -e "${C_AST_GREEN}mountdir: ${MOUNTDIR}"
        echo -e "${C_AST_GREEN}rootdir: ${ROOTDIR}"
        echo -e "${C_AST_GREEN}node_num: ${NODE_NUM}"
        echo -e "${C_AST_GREEN}additional daemon args: ${ARGS}"
        echo -e "${C_AST_GREEN}additional daemon args: ${DAEMON_ARGS_}"
        echo -e "${C_AST_GREEN}cpus_per_task: ${CPUS_PER_TASK}"
        [[ ${USE_PROXY} == true ]] && echo -e "${C_AST_GREEN}Proxy enabled"
    fi
    if [[ ${VERBOSE} == true ]]; then
        echo -e "${C_AST_GREEN}Cleaning host file ..."
    fi
    rm "${HOSTSFILE}" 2> /dev/null
    # Setting up base daemon cmd
    local daemon_cmd="${DAEMON_BIN} -r ${ROOTDIR} -m ${MOUNTDIR} -H ${HOSTSFILE} ${ARGS}"
    local daemon_cmd="${DAEMON_BIN} -r ${ROOTDIR} -m ${MOUNTDIR} -H ${HOSTSFILE} ${DAEMON_ARGS_}"
    # Setting up numactl
    if [[ ${USE_NUMACTL} == true ]]; then
    if [[ ${DAEMON_NUMACTL_} == true ]]; then
        daemon_cmd="numactl --cpunodebind=${DAEMON_CPUNODEBIND} --membind=${DAEMON_MEMBIND} ${daemon_cmd}"
    fi
    # final daemon execute command
    daemon_execute="${srun_cmd}${daemon_cmd}"

    # Setting up base proxy command
    if [[ ${USE_PROXY} == true ]]; then
        local proxy_cmd="${PROXY_BIN} -H ${HOSTSFILE} --pid-path ${PROXY_LOCAL_PID_FILE} ${PROXY_ARGS_}"
        # Setting up numactl
        if [[ ${PROXY_NUMACTL_} == true ]]; then
            proxy_cmd="numactl --cpunodebind=${PROXY_CPUNODEBIND} --membind=${PROXY_MEMBIND} ${proxy_cmd}"
        fi
        # final proxy execute command
        proxy_execute="${srun_cmd}${proxy_cmd}"
    fi

    if [[ ${VERBOSE} == true ]]; then
        echo -e "${C_AST_GREEN}Full execute DAEMON command:"
        echo -e "${C_AST_GREEN}# $daemon_execute"
        [[ ${USE_PROXY} == true ]] && echo -e "${C_AST_GREEN}Full execute PROXY command:"
        [[ ${USE_PROXY} == true ]] && echo -e "${C_AST_GREEN}# $proxy_execute"
    fi
    # setup environment variables
    export GKFS_DAEMON_LOG_PATH=$GKFS_DAEMON_LOG_PATH
    export GKFS_DAEMON_LOG_LEVEL=$GKFS_DAEMON_LOG_LEVEL
    export GKFS_PROXY_LOG_PATH=$GKFS_PROXY_LOG_PATH
    export GKFS_PROXY_LOG_LEVEL=$GKFS_PROXY_LOG_LEVEL

    echo -e "${C_AST_GREEN}Starting GekkoFS daemons (${NODE_NUM} nodes) ..."
    start_time="$(date -u +%s.%3N)"
@@ -151,11 +171,35 @@ start_daemon() {
    echo -e "${C_AST_GREEN}GekkoFS daemons running"
    echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds"

    if [[ ${USE_PROXY} == false ]]; then
        echo -e "${C_AST_GREEN}Starting GekkoFS proxies (${NODE_NUM} nodes) ..."
        start_time="$(date -u +%s.%3N)"
        ${proxy_execute} &
        local proxy_pid=$!
        sleep 5 # TODO
        stop_time="$(date -u +%s.%3N)"
        elapsed="$(bc <<<"$stop_time-$start_time")"
        echo -e "${C_AST_GREEN}GekkoFS daemons probably :) running"
        echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds"
    fi

    if [[ ${RUN_FOREGROUND} == true ]]; then
        echo "Press 'q' to exit"
        while : ; do
            read -n 1 k <&1
            if [[ $k = q ]] ; then
                if [[ ${USE_PROXY} == false ]]; then
                    start_time="$(date -u +%s.%3N)"
                    echo
                    echo -e "${C_AST_GREEN}Shutting down GekkoFS proxies ..."
                    if [[ -n ${proxy_pid} ]]; then
                        kill -s SIGINT ${proxy_pid} &
                        wait ${proxy_pid}
                    fi
                    stop_time="$(date -u +%s.%3N)"
                    elapsed="$(bc <<<"$stop_time-$start_time")"
                    echo -e "${C_AST_GREEN}Shutdown time: ${elapsed} seconds"
                fi
                start_time="$(date -u +%s.%3N)"
                echo
                echo -e "${C_AST_GREEN}Shutting down GekkoFS daemons ..."
@@ -173,18 +217,19 @@ start_daemon() {
        done
    else
        create_pid_file ${daemon_pid}
#        create_pid_file ${proxy_pid}
    fi
}
#######################################
# Stops GekkoFS daemons for the configured pid file
# Globals:
#   DAEMON_PID_FILE
#   SRUN_DAEMON_PID_FILE
#   VERBOSE
# Outputs:
#   Writes status to stdout
#######################################
stop_daemons() {
    local pid_file=${DAEMON_PID_FILE}
    local pid_file=${SRUN_DAEMON_PID_FILE}
    if [[ -e ${pid_file} ]]; then
        while IFS= read -r line
        do
@@ -214,8 +259,8 @@ stop_daemons() {
#######################################
usage_short() {
    echo "
usage: gkfs [-h/--help] [-r/--rootdir <path>] [-m/--mountdir <path>] [-a/--args <daemon_args>] [-f/--foreground <false>]
        [--srun <false>] [-n/--numnodes <jobsize>] [--cpuspertask <64>] [--numactl <false>] [-v/--verbose <false>]
usage: gkfs [-h/--help] [-r/--rootdir <path>] [-m/--mountdir <path>] [-a/--args <daemon_args>] [--proxy <false>] [-f/--foreground <false>]
        [--srun <false>] [-n/--numnodes <jobsize>] [--cpuspertask <64>] [--daemon_numactl <false>] [--proxy_numactl <false>] [-v/--verbose <false>]
        {start,stop}
    "
}
@@ -238,14 +283,17 @@ help_msg() {
            -h, --help              Shows this help message and exits
            -r, --rootdir <path>    The rootdir path for GekkoFS daemons.
            -m, --mountdir <path>   The mountdir path for GekkoFS daemons.
            -a, --args <daemon_arguments>
            -d, --daemon_args <daemon_arguments>
            --proxy                 Start proxy after the daemons are running.
                                    Add various additional daemon arguments, e.g., \"-l ib0 -P ofi+psm2\".
            -p, --proxy_args <proxy_arguments>
            -f, --foreground        Starts the script in the foreground. Daemons are stopped by pressing 'q'.
            --srun                  Use srun to start daemons on multiple nodes.
            -n, --numnodes <n>      GekkoFS daemons are started on n nodes.
                                    Nodelist is extracted from Slurm via the SLURM_JOB_ID env variable.
            --cpuspertask <#cores>  Set the number of cores the daemons can use. Must use '--srun'.
            --numactl               Use numactl for the daemon. Modify gkfs.conf for further numactl configurations.
            --daemon_numactl        Use numactl for the daemon. Modify gkfs.conf for further numactl configurations.
            --proxy_numactl         Use numactl for the proxy. Modify gkfs.conf for further numactl configurations.
            -c, --config            Path to configuration file. By defaults looks for a 'gkfs.conf' in this directory.
            -v, --verbose           Increase verbosity
            "
@@ -281,10 +329,13 @@ MOUNTDIR=${DAEMON_MOUNTDIR}
ROOTDIR=${DAEMON_ROOTDIR}
HOSTSFILE=${LIBGKFS_HOSTS_FILE}
CPUS_PER_TASK=$(grep -c ^processor /proc/cpuinfo)
ARGS=${DAEMON_ARGS}
DAEMON_ARGS_=${DAEMON_ARGS}
PROXY_ARGS_=${PROXY_ARGS}
USE_SRUN=${USE_SRUN}
RUN_FOREGROUND=false
USE_NUMACTL=${DAEMON_NUMACTL}
DAEMON_NUMACTL_=${DAEMON_NUMACTL}
PROXY_NUMACTL_=${PROXY_NUMACTL}
USE_PROXY=false
# parse input
POSITIONAL=()
while [[ $# -gt 0 ]]; do
@@ -306,11 +357,20 @@ while [[ $# -gt 0 ]]; do
        shift # past argument
        shift # past value
        ;;
    -a | --args)
        ARGS="${ARGS} $2"
    -d | --daemon_args)
        DAEMON_ARGS_="${DAEMON_ARGS_} $2"
        shift # past argument
        shift # past value
        ;;
    -p | --proxy_args)
        PROXY_ARGS_="${PROXY_ARGS_} $2"
        shift # past argument
        shift # past value
        ;;
    --proxy)
        USE_PROXY=true
        shift # past argument
        ;;
    --srun)
        USE_SRUN=true
        shift # past argument
@@ -319,8 +379,12 @@ while [[ $# -gt 0 ]]; do
        RUN_FOREGROUND=true
        shift # past argument
        ;;
    --numactl)
        USE_NUMACTL=true
    --daemon_numactl)
        DAEMON_NUMACTL_=true
        shift # past argument
        ;;
    --proxy_numactl)
        PROXY_NUMACTL_=true
        shift # past argument
        ;;
    --cpuspertask)
+43 −0
Original line number Diff line number Diff line
#!/bin/bash

# binaries (default for project_dir/build
PRELOAD_LIB=/lustre/miifs01/project/m2_zdvresearch/vef/io500/lib/libgkfs_intercept.so
DAEMON_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/io500/bin/gkfs_daemon
PROXY_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/io500/bin/gkfs_proxy

## client configuration
LIBGKFS_HOSTS_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_hostfile

## daemon configuration
#DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir
DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir
DAEMON_MOUNTDIR=/dev/shm/vef_gkfs_mountdir
# additional daemon arguments (see `gkfs_daemon -h`)
# use numactl to pin daemon to socket
DAEMON_ARGS="-l ib0 -c"
DAEMON_NUMACTL=true
DAEMON_CPUNODEBIND="1"
DAEMON_MEMBIND="1"

## proxy configuration
PROXY_LOCAL_PID_FILE=/dev/shm/vef_gkfs_proxy.pid
PROXY_ARGS=""
PROXY_NUMACTL=true
PROXY_CPUNODEBIND="0"
PROXY_MEMBIND="0"

## slurm configuration
# Use Slurm's srun to start the daemons on multiple nodes and set specific srun args
USE_SRUN=true
SRUN_ARGS="--ntasks-per-node=1 --overcommit --contiguous --oversubscribe --mem=0"
# path to daemon pid file; created where the script is run
SRUN_DAEMON_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_daemon.pid
SRUN_PROXY_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_proxy.pid # TODO

# logging configuration
GKFS_DAEMON_LOG_LEVEL=info
GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log
GKFS_PROXY_LOG_LEVEL=info
GKFS_PROXY_LOG_PATH=/dev/shm/vef_gkfs_proxy.log
LIBGKFS_LOG=errors,warnings
LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log
 No newline at end of file