Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
hpc
gekkofs
Commits
642cdcb3
Verified
Commit
642cdcb3
authored
Apr 26, 2022
by
Marc Vef
Browse files
New script: gkfs start and stop daemons locally and for srun (beta)
parent
b4564080
Changes
2
Hide whitespace changes
Inline
Side-by-side
scripts/run/gkfs
0 → 100755
View file @
642cdcb3
#!/bin/bash
# global variables
export
FI_PSM2_DISCONNECT
=
1
export
PSM2_MULTI_EP
=
1
SCRIPTDIR
=
"
$(
cd
"
$(
dirname
"
${
BASH_SOURCE
[0]
}
"
)
"
&&
pwd
-P
)
"
CONFIGPATH
=
"
${
SCRIPTDIR
}
/gkfs.conf"
source
"
$CONFIGPATH
"
VERBOSE
=
false
NODE_NUM
=
1
MOUNTDIR
=
${
DAEMON_MOUNTDIR
}
ROOTDIR
=
${
DAEMON_ROOTDIR
}
HOSTSFILE
=
${
LIBGKFS_HOSTS_FILE
}
CPUS_PER_TASK
=
$(
grep
-c
^processor /proc/cpuinfo
)
ARGS
=
${
DAEMON_ARGS
}
USE_SRUN
=
false
RUN_FOREGROUND
=
false
wait_for_gkfs_daemons
()
{
sleep
2
local
server_wait_cnt
=
0
local
nodes
=
1
if
[[
-n
${
NODE_NUM
}
]]
;
then
nodes
=
${
NODE_NUM
}
fi
until
[
$((
$(
wc
-l
"
${
HOSTSFILE
}
"
2> /dev/null |
awk
'{print $1}'
)
+
0
))
-eq
"
${
nodes
}
"
]
do
#echo "Waiting for all servers to report connection. Try $server_wait_cnt"
sleep
2
server_wait_cnt
=
$((
server_wait_cnt+1
))
if
[
${
server_wait_cnt
}
-gt
600
]
;
then
echo
"Server failed to start. Exiting ..."
exit
1
fi
done
}
create_pid_file
()
{
local
pid_file
=
${
DAEMON_PID_FILE
}
local
pid
=
${
1
}
if
[[
$VERBOSE
==
true
]]
;
then
echo
"Creating pid file at
${
pid_file
}
with pid
${
pid
}
..."
fi
# if PID file exists another daemon could run
if
[[
-e
${
pid_file
}
]]
;
then
local
pid_file_tmp
=
${
DAEMON_PID_FILE
}
.swp
# create empty tmp file
truncate
-s
0
"
${
pid_file_tmp
}
"
while
IFS
=
read
-r
line
do
if
ps
-p
"
${
line
}
"
>
/dev/null
;
then
# process with pid still running
echo
"
${
line
}
"
>>
"
${
pid_file_tmp
}
"
fi
done
<
"
${
pid_file
}
"
# create pid file with only valid pids
mv
"
${
pid_file_tmp
}
"
"
${
pid_file
}
"
fi
echo
"
${
pid
}
"
>>
"
${
pid_file
}
"
}
start_daemon
()
{
local
node_list
local
srun_cmd
local
daemon_execute
# setup
if
[[
${
USE_SRUN
}
==
true
]]
;
then
node_list
=
$(
scontrol show job
"
${
SLURM_JOB_ID
}
"
|
grep
" NodeList="
|
cut
-d
"="
-f2
)
if
[[
-z
${
NODE_NUM
}
]]
;
then
NODE_NUM
=
$(
scontrol show
hostname
"
${
node_list
}
"
|
wc
-l
)
fi
# Setting up base srun cmd
srun_cmd
=
"srun --disable-status -N
${
NODE_NUM
}
--ntasks=
${
NODE_NUM
}
--ntasks-per-node=1 --overcommit --contiguous --cpus-per-task=
${
CPUS_PER_TASK
}
--oversubscribe --mem=0 "
fi
if
[[
$VERBOSE
==
true
]]
;
then
echo
"### mountdir:
${
MOUNTDIR
}
"
echo
"### rootdir:
${
ROOTDIR
}
"
echo
"### node_num:
${
NODE_NUM
}
"
echo
"### args:
${
ARGS
}
"
echo
"### cpus_per_task:
${
CPUS_PER_TASK
}
"
fi
if
[[
$VERBOSE
==
true
]]
;
then
echo
"# Cleaning host file ..."
fi
rm
"
${
HOSTSFILE
}
"
2> /dev/null
# Setting up base daemon cmd
local
daemon_cmd
=
"
${
DAEMON_BIN
}
-r
${
ROOTDIR
}
-m
${
MOUNTDIR
}
-H
${
HOSTSFILE
}
${
ARGS
}
"
# Setting up numactl
if
[[
${
DAEMON_NUMACTL
}
==
true
]]
;
then
daemon_cmd
=
"numactl --cpunodebind=
${
DAEMON_CPUNODEBIND
}
--membind=
${
DAEMON_MEMBIND
}
${
daemon_cmd
}
"
fi
# final daemon execute command
daemon_execute
=
"
${
srun_cmd
}${
daemon_cmd
}
"
if
[[
${
VERBOSE
}
==
true
]]
;
then
echo
"### Full execute DAEMON command:"
echo
"#####
$daemon_execute
"
fi
# setup environment variables
export
GKFS_DAEMON_LOG_PATH
=
$GKFS_DAEMON_LOG_PATH
export
GKFS_DAEMON_LOG_LEVEL
=
$GKFS_DAEMON_LOG_LEVEL
echo
"Starting daemons ..."
${
daemon_execute
}
&
local
daemon_pid
=
$!
wait_for_gkfs_daemons
echo
"Running ..."
if
[[
${
RUN_FOREGROUND
}
==
true
]]
;
then
echo
"Press 'q' to exit"
while
:
;
do
read
-n
1 k <&1
if
[[
$k
=
q
]]
;
then
echo
echo
"Shutting down ..."
if
[[
-n
${
daemon_pid
}
]]
;
then
kill
-s
SIGINT
${
daemon_pid
}
&
wait
${
daemon_pid
}
fi
break
else
echo
"Press 'q' to exit"
fi
done
else
create_pid_file
${
daemon_pid
}
fi
}
stop_daemons
()
{
local
pid_file
=
${
DAEMON_PID_FILE
}
if
[[
-e
${
pid_file
}
]]
;
then
while
IFS
=
read
-r
line
do
if
ps
-p
"
${
line
}
"
>
/dev/null
;
then
if
[[
$VERBOSE
==
true
]]
;
then
echo
"Stopping daemon with pid
${
line
}
"
fi
kill
-s
SIGINT
"
${
line
}
"
&
# poll pid until it stopped
if
[[
$VERBOSE
==
true
]]
;
then
echo
"Waiting for daemons to exit ..."
fi
timeout
1
tail
--pid
=
${
line
}
-f
/dev/null
fi
done
<
"
${
pid_file
}
"
rm
"
${
pid_file
}
"
else
echo
"No pid file found -> no daemon running. Exiting ..."
fi
}
usage_short
()
{
echo
"
usage: gkfs.sh [-h] [-r/--rootdir <config>] [-m/--mountdir <config>] [-n/--numnodes <jobsize>] [-f/--foreground <false>]
[-a/--args <daemon_args>] [--srun <false>] [-c/--cpuspertask <64>] [-v/--verbose <false>]
{start,stop}
"
}
help_msg
()
{
usage_short
}
# parse input
POSITIONAL
=()
while
[[
$#
-gt
0
]]
;
do
key
=
"
$1
"
case
${
key
}
in
-r
|
--rootdir
)
ROOTDIR
=
$2
shift
# past argument
shift
# past value
;;
-m
|
--mountdir
)
MOUNTDIR
=
$2
shift
# past argument
shift
# past value
;;
-n
|
--numnodes
)
NODE_NUM
=
$2
shift
# past argument
shift
# past value
;;
-a
|
--args
)
ARGS
=
$2
shift
# past argument
shift
# past value
;;
--srun
)
USE_SRUN
=
true
shift
# past argument
;;
-f
|
--foreground
)
RUN_FOREGROUND
=
true
shift
# past argument
;;
-c
|
--cpuspertask
)
CPUS_PER_TASK
=
$2
shift
# past argument
shift
# past value
;;
-h
|
--help
)
help_msg
exit
;;
-v
|
--verbose
)
VERBOSE
=
true
shift
# past argument
;;
*
)
# unknown option
POSITIONAL+
=(
"
$1
"
)
# save it in an array for later
shift
# past argument
;;
esac
done
set
--
"
${
POSITIONAL
[@]
}
"
# restore positional parameters
# positional arguments
if
[[
-z
${
1
+x
}
]]
;
then
echo
"ERROR: Positional arguments missing."
usage_short
exit
1
fi
command
=
"
${
1
}
"
if
[[
${
command
}
!=
*
"start"
*
]]
&&
[[
${
command
}
!=
*
"stop"
*
]]
;
then
echo
"ERROR: command
${
command
}
not supported"
usage_short
exit
1
fi
if
[[
${
command
}
==
"start"
]]
;
then
start_daemon
elif
[[
${
command
}
==
"stop"
]]
;
then
stop_daemons
fi
if
[[
$VERBOSE
==
true
]]
;
then
echo
"Nothing left to do. Exiting :)"
fi
\ No newline at end of file
scripts/run/gkfs.conf
0 → 100644
View file @
642cdcb3
#!/bin/bash
# binaries (default for project_dir/build
PRELOAD_LIB
=../../
build
/
src
/
client
/
libgkfs_intercept
.
so
DAEMON_BIN
=../../
build
/
src
/
daemon
/
gkfs_daemon
PROXY_BIN
=../../
build
/
src
/
proxy
/
gkfs_proxy
# client configuration
LIBGKFS_HOSTS_FILE
=../../
build
/
gkfs_hostfile
# daemon configuration
DAEMON_ROOTDIR
=/
dev
/
shm
/
gkfs_rootdir
DAEMON_MOUNTDIR
=/
dev
/
shm
/
gkfs_mountdir
DAEMON_NUMACTL
=
false
DAEMON_CPUNODEBIND
=
"1"
DAEMON_MEMBIND
=
"1"
DAEMON_PID_FILE
=/
dev
/
shm
/
gkfs_daemon
.
pid
DAEMON_ARGS
=
""
# logging
GKFS_DAEMON_LOG_LEVEL
=
info
GKFS_DAEMON_LOG_PATH
=/
dev
/
shm
/
vef_gkfs_daemon
.
log
LIBGKFS_LOG
=
errors
,
warnings
LIBGKFS_LOG_OUTPUT
=/
dev
/
shm
/
vef_gkfs_client
.
log
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment