Verified Commit 642cdcb3 authored by Marc Vef's avatar Marc Vef
Browse files

New script: gkfs start and stop daemons locally and for srun (beta)

parent b4564080
#!/bin/bash
# global variables
export FI_PSM2_DISCONNECT=1
export PSM2_MULTI_EP=1
SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
CONFIGPATH="${SCRIPTDIR}/gkfs.conf"
source "$CONFIGPATH"
VERBOSE=false
NODE_NUM=1
MOUNTDIR=${DAEMON_MOUNTDIR}
ROOTDIR=${DAEMON_ROOTDIR}
HOSTSFILE=${LIBGKFS_HOSTS_FILE}
CPUS_PER_TASK=$(grep -c ^processor /proc/cpuinfo)
ARGS=${DAEMON_ARGS}
USE_SRUN=false
RUN_FOREGROUND=false
wait_for_gkfs_daemons() {
sleep 2
local server_wait_cnt=0
local nodes=1
if [[ -n ${NODE_NUM} ]]; then
nodes=${NODE_NUM}
fi
until [ $(($(wc -l "${HOSTSFILE}" 2> /dev/null | awk '{print $1}') + 0)) -eq "${nodes}" ]
do
#echo "Waiting for all servers to report connection. Try $server_wait_cnt"
sleep 2
server_wait_cnt=$((server_wait_cnt+1))
if [ ${server_wait_cnt} -gt 600 ]; then
echo "Server failed to start. Exiting ..."
exit 1
fi
done
}
create_pid_file() {
local pid_file=${DAEMON_PID_FILE}
local pid=${1}
if [[ $VERBOSE == true ]]; then
echo "Creating pid file at ${pid_file} with pid ${pid} ..."
fi
# if PID file exists another daemon could run
if [[ -e ${pid_file} ]]; then
local pid_file_tmp=${DAEMON_PID_FILE}.swp
# create empty tmp file
truncate -s 0 "${pid_file_tmp}"
while IFS= read -r line
do
if ps -p "${line}" > /dev/null; then
# process with pid still running
echo "${line}" >> "${pid_file_tmp}"
fi
done < "${pid_file}"
# create pid file with only valid pids
mv "${pid_file_tmp}" "${pid_file}"
fi
echo "${pid}" >> "${pid_file}"
}
start_daemon() {
local node_list
local srun_cmd
local daemon_execute
# setup
if [[ ${USE_SRUN} == true ]]; then
node_list=$(scontrol show job "${SLURM_JOB_ID}" | grep " NodeList=" | cut -d "=" -f2)
if [[ -z ${NODE_NUM} ]]; then
NODE_NUM=$(scontrol show hostname "${node_list}" | wc -l)
fi
# Setting up base srun cmd
srun_cmd="srun --disable-status -N ${NODE_NUM} --ntasks=${NODE_NUM} --ntasks-per-node=1 --overcommit --contiguous --cpus-per-task=${CPUS_PER_TASK} --oversubscribe --mem=0 "
fi
if [[ $VERBOSE == true ]]; then
echo "### mountdir: ${MOUNTDIR}"
echo "### rootdir: ${ROOTDIR}"
echo "### node_num: ${NODE_NUM}"
echo "### args: ${ARGS}"
echo "### cpus_per_task: ${CPUS_PER_TASK}"
fi
if [[ $VERBOSE == true ]]; then
echo "# Cleaning host file ..."
fi
rm "${HOSTSFILE}" 2> /dev/null
# Setting up base daemon cmd
local daemon_cmd="${DAEMON_BIN} -r ${ROOTDIR} -m ${MOUNTDIR} -H ${HOSTSFILE} ${ARGS}"
# Setting up numactl
if [[ ${DAEMON_NUMACTL} == true ]]; then
daemon_cmd="numactl --cpunodebind=${DAEMON_CPUNODEBIND} --membind=${DAEMON_MEMBIND} ${daemon_cmd}"
fi
# final daemon execute command
daemon_execute="${srun_cmd}${daemon_cmd}"
if [[ ${VERBOSE} == true ]]; then
echo "### Full execute DAEMON command:"
echo "##### $daemon_execute"
fi
# setup environment variables
export GKFS_DAEMON_LOG_PATH=$GKFS_DAEMON_LOG_PATH
export GKFS_DAEMON_LOG_LEVEL=$GKFS_DAEMON_LOG_LEVEL
echo "Starting daemons ..."
${daemon_execute} &
local daemon_pid=$!
wait_for_gkfs_daemons
echo "Running ..."
if [[ ${RUN_FOREGROUND} == true ]]; then
echo "Press 'q' to exit"
while : ; do
read -n 1 k <&1
if [[ $k = q ]] ; then
echo
echo "Shutting down ..."
if [[ -n ${daemon_pid} ]]; then
kill -s SIGINT ${daemon_pid} &
wait ${daemon_pid}
fi
break
else
echo "Press 'q' to exit"
fi
done
else
create_pid_file ${daemon_pid}
fi
}
stop_daemons() {
local pid_file=${DAEMON_PID_FILE}
if [[ -e ${pid_file} ]]; then
while IFS= read -r line
do
if ps -p "${line}" > /dev/null; then
if [[ $VERBOSE == true ]]; then
echo "Stopping daemon with pid ${line}"
fi
kill -s SIGINT "${line}" &
# poll pid until it stopped
if [[ $VERBOSE == true ]]; then
echo "Waiting for daemons to exit ..."
fi
timeout 1 tail --pid=${line} -f /dev/null
fi
done < "${pid_file}"
rm "${pid_file}"
else
echo "No pid file found -> no daemon running. Exiting ..."
fi
}
usage_short() {
echo "
usage: gkfs.sh [-h] [-r/--rootdir <config>] [-m/--mountdir <config>] [-n/--numnodes <jobsize>] [-f/--foreground <false>]
[-a/--args <daemon_args>] [--srun <false>] [-c/--cpuspertask <64>] [-v/--verbose <false>]
{start,stop}
"
}
help_msg() {
usage_short
}
# parse input
POSITIONAL=()
while [[ $# -gt 0 ]]; do
key="$1"
case ${key} in
-r | --rootdir)
ROOTDIR=$2
shift # past argument
shift # past value
;;
-m | --mountdir)
MOUNTDIR=$2
shift # past argument
shift # past value
;;
-n | --numnodes)
NODE_NUM=$2
shift # past argument
shift # past value
;;
-a | --args)
ARGS=$2
shift # past argument
shift # past value
;;
--srun)
USE_SRUN=true
shift # past argument
;;
-f | --foreground)
RUN_FOREGROUND=true
shift # past argument
;;
-c | --cpuspertask)
CPUS_PER_TASK=$2
shift # past argument
shift # past value
;;
-h | --help)
help_msg
exit
;;
-v | --verbose)
VERBOSE=true
shift # past argument
;;
*) # unknown option
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
# positional arguments
if [[ -z ${1+x} ]]; then
echo "ERROR: Positional arguments missing."
usage_short
exit 1
fi
command="${1}"
if [[ ${command} != *"start"* ]] && [[ ${command} != *"stop"* ]]; then
echo "ERROR: command ${command} not supported"
usage_short
exit 1
fi
if [[ ${command} == "start" ]]; then
start_daemon
elif [[ ${command} == "stop" ]]; then
stop_daemons
fi
if [[ $VERBOSE == true ]]; then
echo "Nothing left to do. Exiting :)"
fi
\ No newline at end of file
#!/bin/bash
# binaries (default for project_dir/build
PRELOAD_LIB=../../build/src/client/libgkfs_intercept.so
DAEMON_BIN=../../build/src/daemon/gkfs_daemon
PROXY_BIN=../../build/src/proxy/gkfs_proxy
# client configuration
LIBGKFS_HOSTS_FILE=../../build/gkfs_hostfile
# daemon configuration
DAEMON_ROOTDIR=/dev/shm/gkfs_rootdir
DAEMON_MOUNTDIR=/dev/shm/gkfs_mountdir
DAEMON_NUMACTL=false
DAEMON_CPUNODEBIND="1"
DAEMON_MEMBIND="1"
DAEMON_PID_FILE=/dev/shm/gkfs_daemon.pid
DAEMON_ARGS=""
# logging
GKFS_DAEMON_LOG_LEVEL=info
GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log
LIBGKFS_LOG=errors,warnings
LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment