Commit 3fd09cc2 authored by Marc Vef's avatar Marc Vef
Browse files

First version of gkfs start script

parent d067bac5
Loading
Loading
Loading
Loading
Loading

scripts/bin/gkfs

0 → 100644
+186 −0
Original line number Diff line number Diff line
#!/bin/bash
usage_short() {
    echo "
usage: gkfs [-h] [-r/--rootdir <config>] [-m/--mountdir <config>] [-n/--numnodes <jobsize>]
        [-a/--auto-sm <false>] [--srun <true>] [-l/--listen <ib0>] [-c/--cpuspertask <64>] [-v/--verbose <false]
        {daemon} {ofi+sockets,ofi+verbs,ofi+psm2}
    "
}

help_msg() {

    usage_short
}
SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
CONFIGPATH="${SCRIPTDIR}/gkfs_runtime.conf"
# shellcheck source=./gkfs_runtime.conf
source "$CONFIGPATH"

VERBOSE=false
NODE_NUM=""
MOUNTDIR=$DAEMON_MOUNTDIR
ROOTDIR=$DAEMON_ROOTDIR
HOSTSFILE=$LIBGKFS_HOSTS_FILE
CPUS_PER_TASK=64
AUTO_SM=false
LISTEN="ib0"
USE_SRUN=true

POSITIONAL=()
while [[ $# -gt 0 ]]; do
    key="$1"

    case ${key} in
    -r | --rootdir)
        ROOTDIR=$2
        shift # past argument
        shift # past value
        ;;
    -m | --mountdir)
        MOUNTDIR=$2
        shift # past argument
        shift # past value
        ;;
    -n | --numnodes)
        NODE_NUM=$2
        shift # past argument
        shift # past value
        ;;
    -a | --auto-sm)
        AUTO_SM=true
        shift # past argument
        ;;
    --srun)
        USE_SRUN=true
        shift # past argument
        ;;
    -l | --listen)
        LISTEN="$2"
        shift # past argument
        shift # past value
        ;;
    -c | --cpuspertask)
        CPUS_PER_TASK=$2
        shift # past argument
        shift # past value
        ;;
    -h | --help)
        help_msg
        exit
        ;;
    -v | --verbose)
        VERBOSE=true
        shift # past argument
        ;;
    *) # unknown option
        POSITIONAL+=("$1") # save it in an array for later
        shift              # past argument
        ;;
    esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters

# positional arguments
if [[ -z ${2+x} ]]; then
    echo "ERROR: Positional arguments missing."
    usage_short
    exit 1
fi
TYPE="${1}"
PROTOCOL="${2}"

# will be used for other future binaries, e.g., the proxy
if [[ $TYPE != *"daemon"* ]]; then
    echo "ERROR: Type $TYPE not supported"
    usage_short
    exit 1
fi

# setup
NODELIST=$(scontrol show job "$SLURM_JOB_ID" | grep " NodeList=" | cut -d "=" -f2)
if [[ -z $NODE_NUM ]]; then
    NODE_NUM=$(scontrol show hostname "$NODELIST" | wc -l)
fi

if [[ $VERBOSE == true ]]; then
    echo "### TYPE: $TYPE"
    echo "### PROTOCOL: $PROTOCOL"
    echo "### MOUNTDIR: $MOUNTDIR"
    echo "### ROOTDIR: $ROOTDIR"
    echo "### NODE_NUM: $NODE_NUM"
    echo "### AUTO_SM: $AUTO_SM"
    echo "### LISTEN: $LISTEN"
    echo "### CPUS_PER_TASK: $CPUS_PER_TASK"
fi

export FI_PSM2_DISCONNECT=1
export PSM2_MULTI_EP=1

wait_for_gkfs_daemons() {
	sleep 2
    SERVER_WAIT_CNT=0
    until [ $(($(wc -l "$HOSTSFILE"  2> /dev/null | awk '{print $1}') + 0)) -eq "$NODE_NUM" ]
    do
		#echo "Waiting for all servers to report connection. Try $SERVER_WAIT_CNT"
        sleep 2
        SERVER_WAIT_CNT=$((SERVER_WAIT_CNT+1))
        if [ $SERVER_WAIT_CNT -gt 600 ]; then
            echo "Server failed to start. Exiting ..."
            exit 1
        fi
    done
}


echo "Cleaning host file ..."
rm "$HOSTSFILE" 2> /dev/null
# Setting up base srun, daemon commands
SRUN_CMD="srun --disable-status -N $NODE_NUM --ntasks=$NODE_NUM --ntasks-per-node=1 --overcommit --contiguous --cpus-per-task=$CPUS_PER_TASK --oversubscribe --mem=0"
DAEMON_CMD="$DAEMON_BIN -r $ROOTDIR -m $MOUNTDIR -H $HOSTSFILE -l $LISTEN"
# Setting up numactl if enabled in config
if [[ $DAEMON_NUMACTL == true ]]; then
    DAEMON_CMD="numactl --cpunodebind=$DAEMON_CPUNODEBIND --membind=$DAEMON_MEMBIND $DAEMON_CMD"
fi
# enabling auto-sm for daemon
if [[ $AUTO_SM == true ]]; then
    DAEMON_CMD="$DAEMON_CMD --auto-sm"
fi
# final daemon execute command
if [[ $USE_SRUN == true ]]; then
    DAEMON_EXECUTE="$SRUN_CMD $DAEMON_CMD"
else
    DAEMON_EXECUTE="$DAEMON_CMD"
fi

if [[ $VERBOSE == true ]]; then
    echo "### Full execute DAEMON command:"
    echo "##### $DAEMON_EXECUTE"
fi

export GKFS_DAEMON_LOG_PATH=$GKFS_DAEMON_LOG_PATH
export GKFS_DAEMON_LOG_LEVEL=$GKFS_DAEMON_LOG_LEVEL

# Starting daemon
echo "Starting daemon only ..."
$DAEMON_EXECUTE &
DAEMON_SRUN_PID=$!
wait_for_gkfs_daemons
echo "Running /o/"

echo "Press 'q' to exit"
while : ; do
    read -n 1 k <&1
    if [[ $k = q ]] ; then
        echo
        echo "Shutting down ..."
        if [[ -n $DAEMON_SRUN_PID ]]; then
            echo "Stopping daemon ..."
            kill -s SIGINT $DAEMON_SRUN_PID &
            wait $DAEMON_SRUN_PID
        fi
        break
    else
        echo "Press 'q' to exit"
    fi
done
echo "Nothing left to do. Exiting :)"
+39 −0
Original line number Diff line number Diff line
#!/bin/bash

# global
_GKFS_HOSTS_FILE=/home/vef/vef_m2/gkfs_hostfile

# binaries
PRELOAD_LIB=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/client/libgkfs_intercept.so
DAEMON_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/daemon/gkfs_daemon
PROXY_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/proxy/gkfs_proxy

# client configuration
LIBGKFS_PROXY_PID_FILE=$_PROXY_PID_PATH
LIBGKFS_HOSTS_FILE=$_GKFS_HOSTS_FILE

# daemon configuration
DAEMON_ROOTDIR=/dev/shm/vef_rootdir
#DAEMON_ROOTDIR=/localscratch/$SLURM_JOB_ID/vef_rootdir
DAEMON_MOUNTDIR=/dev/shm/vef_mountdir
DAEMON_NUMACTL=true
DAEMON_CPUNODEBIND="1"
DAEMON_MEMBIND="1"

# proxy configuration
_PROXY_PID_PATH=/dev/shm/vef_gkfs_proxy.pid
PROXY_NUMACTL=true
PROXY_CPUNODEBIND="0"
PROXY_MEMBIND="0"

# logging
GKFS_DAEMON_LOG_LEVEL=info
GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log
GKFS_PROXY_LOG_LEVEL=info
GKFS_PROXY_LOG_PATH=/dev/shm/vef_gkfs_proxy.log
LIBGKFS_LOG=errors,warnings
LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log

# benchmark bins
IOR=/lustre/miifs01/project/m2_zdvresearch/vef/benchmarks/ior_marc/build/src/ior
MDTEST=/lustre/miifs01/project/m2_zdvresearch/vef/benchmarks/ior_marc/build/src/mdtest
 No newline at end of file