Loading scripts/bin/gkfs 0 → 100644 +186 −0 Original line number Diff line number Diff line #!/bin/bash usage_short() { echo " usage: gkfs [-h] [-r/--rootdir <config>] [-m/--mountdir <config>] [-n/--numnodes <jobsize>] [-a/--auto-sm <false>] [--srun <true>] [-l/--listen <ib0>] [-c/--cpuspertask <64>] [-v/--verbose <false] {daemon} {ofi+sockets,ofi+verbs,ofi+psm2} " } help_msg() { usage_short } SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" CONFIGPATH="${SCRIPTDIR}/gkfs_runtime.conf" # shellcheck source=./gkfs_runtime.conf source "$CONFIGPATH" VERBOSE=false NODE_NUM="" MOUNTDIR=$DAEMON_MOUNTDIR ROOTDIR=$DAEMON_ROOTDIR HOSTSFILE=$LIBGKFS_HOSTS_FILE CPUS_PER_TASK=64 AUTO_SM=false LISTEN="ib0" USE_SRUN=true POSITIONAL=() while [[ $# -gt 0 ]]; do key="$1" case ${key} in -r | --rootdir) ROOTDIR=$2 shift # past argument shift # past value ;; -m | --mountdir) MOUNTDIR=$2 shift # past argument shift # past value ;; -n | --numnodes) NODE_NUM=$2 shift # past argument shift # past value ;; -a | --auto-sm) AUTO_SM=true shift # past argument ;; --srun) USE_SRUN=true shift # past argument ;; -l | --listen) LISTEN="$2" shift # past argument shift # past value ;; -c | --cpuspertask) CPUS_PER_TASK=$2 shift # past argument shift # past value ;; -h | --help) help_msg exit ;; -v | --verbose) VERBOSE=true shift # past argument ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument ;; esac done set -- "${POSITIONAL[@]}" # restore positional parameters # positional arguments if [[ -z ${2+x} ]]; then echo "ERROR: Positional arguments missing." usage_short exit 1 fi TYPE="${1}" PROTOCOL="${2}" # will be used for other future binaries, e.g., the proxy if [[ $TYPE != *"daemon"* ]]; then echo "ERROR: Type $TYPE not supported" usage_short exit 1 fi # setup NODELIST=$(scontrol show job "$SLURM_JOB_ID" | grep " NodeList=" | cut -d "=" -f2) if [[ -z $NODE_NUM ]]; then NODE_NUM=$(scontrol show hostname "$NODELIST" | wc -l) fi if [[ $VERBOSE == true ]]; then echo "### TYPE: $TYPE" echo "### PROTOCOL: $PROTOCOL" echo "### MOUNTDIR: $MOUNTDIR" echo "### ROOTDIR: $ROOTDIR" echo "### NODE_NUM: $NODE_NUM" echo "### AUTO_SM: $AUTO_SM" echo "### LISTEN: $LISTEN" echo "### CPUS_PER_TASK: $CPUS_PER_TASK" fi export FI_PSM2_DISCONNECT=1 export PSM2_MULTI_EP=1 wait_for_gkfs_daemons() { sleep 2 SERVER_WAIT_CNT=0 until [ $(($(wc -l "$HOSTSFILE" 2> /dev/null | awk '{print $1}') + 0)) -eq "$NODE_NUM" ] do #echo "Waiting for all servers to report connection. Try $SERVER_WAIT_CNT" sleep 2 SERVER_WAIT_CNT=$((SERVER_WAIT_CNT+1)) if [ $SERVER_WAIT_CNT -gt 600 ]; then echo "Server failed to start. Exiting ..." exit 1 fi done } echo "Cleaning host file ..." rm "$HOSTSFILE" 2> /dev/null # Setting up base srun, daemon commands SRUN_CMD="srun --disable-status -N $NODE_NUM --ntasks=$NODE_NUM --ntasks-per-node=1 --overcommit --contiguous --cpus-per-task=$CPUS_PER_TASK --oversubscribe --mem=0" DAEMON_CMD="$DAEMON_BIN -r $ROOTDIR -m $MOUNTDIR -H $HOSTSFILE -l $LISTEN" # Setting up numactl if enabled in config if [[ $DAEMON_NUMACTL == true ]]; then DAEMON_CMD="numactl --cpunodebind=$DAEMON_CPUNODEBIND --membind=$DAEMON_MEMBIND $DAEMON_CMD" fi # enabling auto-sm for daemon if [[ $AUTO_SM == true ]]; then DAEMON_CMD="$DAEMON_CMD --auto-sm" fi # final daemon execute command if [[ $USE_SRUN == true ]]; then DAEMON_EXECUTE="$SRUN_CMD $DAEMON_CMD" else DAEMON_EXECUTE="$DAEMON_CMD" fi if [[ $VERBOSE == true ]]; then echo "### Full execute DAEMON command:" echo "##### $DAEMON_EXECUTE" fi export GKFS_DAEMON_LOG_PATH=$GKFS_DAEMON_LOG_PATH export GKFS_DAEMON_LOG_LEVEL=$GKFS_DAEMON_LOG_LEVEL # Starting daemon echo "Starting daemon only ..." $DAEMON_EXECUTE & DAEMON_SRUN_PID=$! wait_for_gkfs_daemons echo "Running /o/" echo "Press 'q' to exit" while : ; do read -n 1 k <&1 if [[ $k = q ]] ; then echo echo "Shutting down ..." if [[ -n $DAEMON_SRUN_PID ]]; then echo "Stopping daemon ..." kill -s SIGINT $DAEMON_SRUN_PID & wait $DAEMON_SRUN_PID fi break else echo "Press 'q' to exit" fi done echo "Nothing left to do. Exiting :)" scripts/bin/gkfs_runtime.conf 0 → 100644 +39 −0 Original line number Diff line number Diff line #!/bin/bash # global _GKFS_HOSTS_FILE=/home/vef/vef_m2/gkfs_hostfile # binaries PRELOAD_LIB=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/client/libgkfs_intercept.so DAEMON_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/daemon/gkfs_daemon PROXY_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/proxy/gkfs_proxy # client configuration LIBGKFS_PROXY_PID_FILE=$_PROXY_PID_PATH LIBGKFS_HOSTS_FILE=$_GKFS_HOSTS_FILE # daemon configuration DAEMON_ROOTDIR=/dev/shm/vef_rootdir #DAEMON_ROOTDIR=/localscratch/$SLURM_JOB_ID/vef_rootdir DAEMON_MOUNTDIR=/dev/shm/vef_mountdir DAEMON_NUMACTL=true DAEMON_CPUNODEBIND="1" DAEMON_MEMBIND="1" # proxy configuration _PROXY_PID_PATH=/dev/shm/vef_gkfs_proxy.pid PROXY_NUMACTL=true PROXY_CPUNODEBIND="0" PROXY_MEMBIND="0" # logging GKFS_DAEMON_LOG_LEVEL=info GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log GKFS_PROXY_LOG_LEVEL=info GKFS_PROXY_LOG_PATH=/dev/shm/vef_gkfs_proxy.log LIBGKFS_LOG=errors,warnings LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log # benchmark bins IOR=/lustre/miifs01/project/m2_zdvresearch/vef/benchmarks/ior_marc/build/src/ior MDTEST=/lustre/miifs01/project/m2_zdvresearch/vef/benchmarks/ior_marc/build/src/mdtest No newline at end of file Loading
scripts/bin/gkfs 0 → 100644 +186 −0 Original line number Diff line number Diff line #!/bin/bash usage_short() { echo " usage: gkfs [-h] [-r/--rootdir <config>] [-m/--mountdir <config>] [-n/--numnodes <jobsize>] [-a/--auto-sm <false>] [--srun <true>] [-l/--listen <ib0>] [-c/--cpuspertask <64>] [-v/--verbose <false] {daemon} {ofi+sockets,ofi+verbs,ofi+psm2} " } help_msg() { usage_short } SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" CONFIGPATH="${SCRIPTDIR}/gkfs_runtime.conf" # shellcheck source=./gkfs_runtime.conf source "$CONFIGPATH" VERBOSE=false NODE_NUM="" MOUNTDIR=$DAEMON_MOUNTDIR ROOTDIR=$DAEMON_ROOTDIR HOSTSFILE=$LIBGKFS_HOSTS_FILE CPUS_PER_TASK=64 AUTO_SM=false LISTEN="ib0" USE_SRUN=true POSITIONAL=() while [[ $# -gt 0 ]]; do key="$1" case ${key} in -r | --rootdir) ROOTDIR=$2 shift # past argument shift # past value ;; -m | --mountdir) MOUNTDIR=$2 shift # past argument shift # past value ;; -n | --numnodes) NODE_NUM=$2 shift # past argument shift # past value ;; -a | --auto-sm) AUTO_SM=true shift # past argument ;; --srun) USE_SRUN=true shift # past argument ;; -l | --listen) LISTEN="$2" shift # past argument shift # past value ;; -c | --cpuspertask) CPUS_PER_TASK=$2 shift # past argument shift # past value ;; -h | --help) help_msg exit ;; -v | --verbose) VERBOSE=true shift # past argument ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument ;; esac done set -- "${POSITIONAL[@]}" # restore positional parameters # positional arguments if [[ -z ${2+x} ]]; then echo "ERROR: Positional arguments missing." usage_short exit 1 fi TYPE="${1}" PROTOCOL="${2}" # will be used for other future binaries, e.g., the proxy if [[ $TYPE != *"daemon"* ]]; then echo "ERROR: Type $TYPE not supported" usage_short exit 1 fi # setup NODELIST=$(scontrol show job "$SLURM_JOB_ID" | grep " NodeList=" | cut -d "=" -f2) if [[ -z $NODE_NUM ]]; then NODE_NUM=$(scontrol show hostname "$NODELIST" | wc -l) fi if [[ $VERBOSE == true ]]; then echo "### TYPE: $TYPE" echo "### PROTOCOL: $PROTOCOL" echo "### MOUNTDIR: $MOUNTDIR" echo "### ROOTDIR: $ROOTDIR" echo "### NODE_NUM: $NODE_NUM" echo "### AUTO_SM: $AUTO_SM" echo "### LISTEN: $LISTEN" echo "### CPUS_PER_TASK: $CPUS_PER_TASK" fi export FI_PSM2_DISCONNECT=1 export PSM2_MULTI_EP=1 wait_for_gkfs_daemons() { sleep 2 SERVER_WAIT_CNT=0 until [ $(($(wc -l "$HOSTSFILE" 2> /dev/null | awk '{print $1}') + 0)) -eq "$NODE_NUM" ] do #echo "Waiting for all servers to report connection. Try $SERVER_WAIT_CNT" sleep 2 SERVER_WAIT_CNT=$((SERVER_WAIT_CNT+1)) if [ $SERVER_WAIT_CNT -gt 600 ]; then echo "Server failed to start. Exiting ..." exit 1 fi done } echo "Cleaning host file ..." rm "$HOSTSFILE" 2> /dev/null # Setting up base srun, daemon commands SRUN_CMD="srun --disable-status -N $NODE_NUM --ntasks=$NODE_NUM --ntasks-per-node=1 --overcommit --contiguous --cpus-per-task=$CPUS_PER_TASK --oversubscribe --mem=0" DAEMON_CMD="$DAEMON_BIN -r $ROOTDIR -m $MOUNTDIR -H $HOSTSFILE -l $LISTEN" # Setting up numactl if enabled in config if [[ $DAEMON_NUMACTL == true ]]; then DAEMON_CMD="numactl --cpunodebind=$DAEMON_CPUNODEBIND --membind=$DAEMON_MEMBIND $DAEMON_CMD" fi # enabling auto-sm for daemon if [[ $AUTO_SM == true ]]; then DAEMON_CMD="$DAEMON_CMD --auto-sm" fi # final daemon execute command if [[ $USE_SRUN == true ]]; then DAEMON_EXECUTE="$SRUN_CMD $DAEMON_CMD" else DAEMON_EXECUTE="$DAEMON_CMD" fi if [[ $VERBOSE == true ]]; then echo "### Full execute DAEMON command:" echo "##### $DAEMON_EXECUTE" fi export GKFS_DAEMON_LOG_PATH=$GKFS_DAEMON_LOG_PATH export GKFS_DAEMON_LOG_LEVEL=$GKFS_DAEMON_LOG_LEVEL # Starting daemon echo "Starting daemon only ..." $DAEMON_EXECUTE & DAEMON_SRUN_PID=$! wait_for_gkfs_daemons echo "Running /o/" echo "Press 'q' to exit" while : ; do read -n 1 k <&1 if [[ $k = q ]] ; then echo echo "Shutting down ..." if [[ -n $DAEMON_SRUN_PID ]]; then echo "Stopping daemon ..." kill -s SIGINT $DAEMON_SRUN_PID & wait $DAEMON_SRUN_PID fi break else echo "Press 'q' to exit" fi done echo "Nothing left to do. Exiting :)"
scripts/bin/gkfs_runtime.conf 0 → 100644 +39 −0 Original line number Diff line number Diff line #!/bin/bash # global _GKFS_HOSTS_FILE=/home/vef/vef_m2/gkfs_hostfile # binaries PRELOAD_LIB=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/client/libgkfs_intercept.so DAEMON_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/daemon/gkfs_daemon PROXY_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/sshfs/gekkofs/build/src/proxy/gkfs_proxy # client configuration LIBGKFS_PROXY_PID_FILE=$_PROXY_PID_PATH LIBGKFS_HOSTS_FILE=$_GKFS_HOSTS_FILE # daemon configuration DAEMON_ROOTDIR=/dev/shm/vef_rootdir #DAEMON_ROOTDIR=/localscratch/$SLURM_JOB_ID/vef_rootdir DAEMON_MOUNTDIR=/dev/shm/vef_mountdir DAEMON_NUMACTL=true DAEMON_CPUNODEBIND="1" DAEMON_MEMBIND="1" # proxy configuration _PROXY_PID_PATH=/dev/shm/vef_gkfs_proxy.pid PROXY_NUMACTL=true PROXY_CPUNODEBIND="0" PROXY_MEMBIND="0" # logging GKFS_DAEMON_LOG_LEVEL=info GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log GKFS_PROXY_LOG_LEVEL=info GKFS_PROXY_LOG_PATH=/dev/shm/vef_gkfs_proxy.log LIBGKFS_LOG=errors,warnings LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log # benchmark bins IOR=/lustre/miifs01/project/m2_zdvresearch/vef/benchmarks/ior_marc/build/src/ior MDTEST=/lustre/miifs01/project/m2_zdvresearch/vef/benchmarks/ior_marc/build/src/mdtest No newline at end of file