Verified Commit e28714b0 authored by Marc Vef's avatar Marc Vef
Browse files

gkfs script slurm nodelist support, bugfix, cpu affinity

add better cpu affinity option via config file
parent 94e84f40
Loading
Loading
Loading
Loading
+52 −26
Original line number Diff line number Diff line
@@ -91,8 +91,8 @@ create_pid_file() {
#   PROXY_ARGS_
#   CPUS_PER_TASK
#   VERBOSE
#   DAEMON_NUMACTL_
#   PROXY_NUMACTL_
#   DAEMON_AFFINITY_
#   PROXY_AFFINITY_
#   USE_PROXY
#   DAEMON_CPUNODEBIND
#   DAEMON_MEMBIND
@@ -104,17 +104,41 @@ create_pid_file() {
#######################################
start_daemon() {
    local node_list
    local srun_cmd
    local srun_daemon_cmd
    local srun_proxy_cmd
    local daemon_execute
    local proxy_execute
    # setup
    if [[ ${USE_SRUN} == true ]]; then
        # check for daemon first
        if [[ -n ${DAEMON_NODELIST_} ]]; then
            if [[ ! -f ${DAEMON_NODELIST_} ]]; then
                echo -e "${C_AST_RED}ERROR: Daemon nodelist file not found at ${DAEMON_NODELIST_}. Exiting ..."
                exit 1
            fi
            NODE_NUM=$(wc -l < "${DAEMON_NODELIST_}")
            srun_daemon_cmd="srun --disable-status --nodelist=${DAEMON_NODELIST_} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} "
        else
            node_list=$(scontrol show job "${SLURM_JOB_ID}" | grep " NodeList=" | cut -d "=" -f2)
            if [[ -z ${NODE_NUM} ]]; then
                NODE_NUM=$(scontrol show hostname "${node_list}" | wc -l)
            fi
            # Setting up base srun cmd
        srun_cmd="srun --disable-status -N ${NODE_NUM} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} "
            srun_daemon_cmd="srun --disable-status -N ${NODE_NUM} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} "
        fi
        if [[ ${USE_PROXY} == true ]]; then
            if [[ -n ${PROXY_NODELIST_} ]]; then
                if [[ ! -f ${PROXY_NODELIST_} ]]; then
                    echo -e "${C_AST_RED}ERROR: Proxy nodelist file not found at ${PROXY_NODELIST_}. Exiting ..."
                    exit 1
                fi
                NODE_NUM_PROXY=$(wc -l < "${PROXY_NODELIST_}")
                srun_proxy_cmd="srun --disable-status --nodelist=${PROXY_NODELIST_} --ntasks=${NODE_NUM_PROXY} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} "
            else
                srun_proxy_cmd="srun --disable-status -N ${NODE_NUM} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} "
                NODE_NUM_PROXY=$NODE_NUM
            fi
        fi
    else
        NODE_NUM=1
    fi
@@ -136,22 +160,22 @@ start_daemon() {
    if [[ ${USE_PROXY} == true ]]; then
        daemon_cmd="${daemon_cmd} ${DAEMON_PROXY_ARGS}"
    fi
    # Setting up numactl
    if [[ ${DAEMON_NUMACTL_} == true ]]; then
        daemon_cmd="numactl --cpunodebind=${DAEMON_CPUNODEBIND} --membind=${DAEMON_MEMBIND} ${daemon_cmd}"
    # Set cpu affinity for daemon
    if [[ -n ${DAEMON_AFFINITY_} ]]; then
        daemon_cmd="${DAEMON_AFFINITY_} ${daemon_cmd}"
    fi
    # final daemon execute command
    daemon_execute="${srun_cmd}${daemon_cmd}"
    daemon_execute="${srun_daemon_cmd} ${SRUN_DAEMON_ARGS} ${daemon_cmd}"

    # Setting up base proxy command
    if [[ ${USE_PROXY} == true ]]; then
        local proxy_cmd="${PROXY_BIN} -H ${HOSTSFILE} --pid-path ${PROXY_LOCAL_PID_FILE} ${PROXY_ARGS_}"
        # Setting up numactl
        if [[ ${PROXY_NUMACTL_} == true ]]; then
            proxy_cmd="numactl --cpunodebind=${PROXY_CPUNODEBIND} --membind=${PROXY_MEMBIND} ${proxy_cmd}"
        # Set cpu affinity for proxy
        if [[ -n ${PROXY_AFFINITY_} ]]; then
            proxy_cmd="${PROXY_AFFINITY_} ${proxy_cmd}"
        fi
        # final proxy execute command
        proxy_execute="${srun_cmd}${proxy_cmd}"
        proxy_execute="${srun_proxy_cmd} ${SRUN_PROXY_ARGS} ${proxy_cmd}"
    fi

    if [[ ${VERBOSE} == true ]]; then
@@ -177,7 +201,7 @@ start_daemon() {
    echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds"

    if [[ ${USE_PROXY} == true ]]; then
        echo -e "${C_AST_GREEN}Starting GekkoFS proxies (${NODE_NUM} nodes) ..."
        echo -e "${C_AST_GREEN}Starting GekkoFS proxies (${NODE_NUM_PROXY} nodes) ..."
        start_time="$(date -u +%s.%3N)"
        ${proxy_execute} &
        local proxy_pid=$!
@@ -293,7 +317,7 @@ stop_daemons() {
usage_short() {
    echo "
usage: gkfs [-h/--help] [-r/--rootdir <path>] [-m/--mountdir <path>] [-a/--args <daemon_args>] [--proxy <false>] [-f/--foreground <false>]
        [--srun <false>] [-n/--numnodes <jobsize>] [--cpuspertask <64>] [--daemon_numactl <false>] [--proxy_numactl <false>] [-v/--verbose <false>]
        [--srun <false>] [-n/--numnodes <jobsize>] [--cpuspertask <64>] [-v/--verbose <false>]
        {start,stop}
    "
}
@@ -325,8 +349,6 @@ help_msg() {
            -n, --numnodes <n>      GekkoFS daemons are started on n nodes.
                                    Nodelist is extracted from Slurm via the SLURM_JOB_ID env variable.
            --cpuspertask <#cores>  Set the number of cores the daemons can use. Must use '--srun'.
            --daemon_numactl        Use numactl for the daemon. Modify gkfs.conf for further numactl configurations.
            --proxy_numactl         Use numactl for the proxy. Modify gkfs.conf for further numactl configurations.
            -c, --config            Path to configuration file. By defaults looks for a 'gkfs.conf' in this directory.
            -v, --verbose           Increase verbosity
            "
@@ -365,9 +387,11 @@ CPUS_PER_TASK=$(grep -c ^processor /proc/cpuinfo)
DAEMON_ARGS_=${DAEMON_ARGS}
PROXY_ARGS_=${PROXY_ARGS}
USE_SRUN=${USE_SRUN}
DAEMON_NODELIST_=${DAEMON_NODELIST}
PROXY_NODELIST_=${PROXY_NODELIST}
RUN_FOREGROUND=false
DAEMON_NUMACTL_=${DAEMON_NUMACTL}
PROXY_NUMACTL_=${PROXY_NUMACTL}
DAEMON_AFFINITY_=${DAEMON_AFFINITY}
PROXY_AFFINITY_=${PROXY_AFFINITY}
USE_PROXY=${USE_PROXY}
# parse input
POSITIONAL=()
@@ -412,13 +436,15 @@ while [[ $# -gt 0 ]]; do
        RUN_FOREGROUND=true
        shift # past argument
        ;;
    --daemon_numactl)
        DAEMON_NUMACTL_=true
    --daemon_nodelist)
        DAEMON_NODELIST_="$2"
        shift # past argument
        shift # past value
        ;;
    --proxy_numactl)
        PROXY_NUMACTL_=true
    --proxy_nodelist)
        PROXY_NODELIST_="$2"
        shift # past argument
        shift # past value
        ;;
    --cpuspertask)
        CPUS_PER_TASK=$2
+7 −4
Original line number Diff line number Diff line
@@ -16,11 +16,14 @@ DAEMON_PID_FILE=./gkfs_daemon.pid
DAEMON_ARGS=""
# Use Slurm's srun to start the daemons on multiple nodes and set specific srun args
USE_SRUN=false
# path to hostfile for srun for daemon
DAEMON_NODELIST=""
# srun args
SRUN_ARGS="--ntasks-per-node=1 --overcommit --contiguous --oversubscribe --mem=0"
# use numactl to pin daemon to socket
DAEMON_NUMACTL=false
DAEMON_CPUNODEBIND="1"
DAEMON_MEMBIND="1"
# Specific srun args for daemon
SRUN_DAEMON_ARGS=""
# use cpu affinity. Set this eg to `taskset -c ...`
DAEMON_AFFINITY=""

# logging
GKFS_DAEMON_LOG_LEVEL=info
+15 −9
Original line number Diff line number Diff line
@@ -9,31 +9,37 @@ PROXY_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/io500/bin/gkfs_proxy
LIBGKFS_HOSTS_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_hostfile

## daemon configuration
#DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir
DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir
DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir
#DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir
DAEMON_MOUNTDIR=/dev/shm/vef_gkfs_mountdir
# additional daemon arguments (see `gkfs_daemon -h`)
# use numactl to pin daemon to socket
DAEMON_ARGS="-l ib0 -c"
DAEMON_NUMACTL=true
DAEMON_CPUNODEBIND="1"
DAEMON_MEMBIND="1"
# use cpu affinity. Set this eg to `taskset -c ...`
DAEMON_AFFINITY=""

## proxy configuration
DAEMON_PROXY_ARGS="--proxy-listen ib0 --proxy-protocol ofi+sockets"
PROXY_LOCAL_PID_FILE=/dev/shm/vef_gkfs_proxy.pid
PROXY_ARGS=""
PROXY_NUMACTL=true
PROXY_CPUNODEBIND="0"
PROXY_MEMBIND="0"
# use cpu affinity. Set this eg to `taskset -c ...`
PROXY_AFFINITY=""

## slurm configuration
# Use Slurm's srun to start the daemons on multiple nodes and set specific srun args
USE_SRUN=true
# path to hostfile for srun for daemon and proxy
DAEMON_NODELIST=""
PROXY_NODELIST=""
# srun args
SRUN_ARGS="--ntasks-per-node=1 --overcommit --contiguous --oversubscribe --mem=0"
# Specific srun args for daemon
SRUN_DAEMON_ARGS=""
# Specific srun args for proxy
SRUN_PROXY_ARGS=""
# path to daemon pid file; created where the script is run
SRUN_DAEMON_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_daemon.pid
SRUN_PROXY_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_proxy.pid # TODO
SRUN_PROXY_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_proxy.pid

# logging configuration
GKFS_DAEMON_LOG_LEVEL=info