Verified Commit 4ee1445e authored by Alberto Miranda's avatar Alberto Miranda ♨️
Browse files

[Slurm] Start Cargo as a systemd user-level service

Cargo is now started as a systemd user-level service whenever a
job requests for an ad-hoc storage service.

The following scripts have been added:

- `cargo@.service`: The Cargo systemd service file. Installed in
  `$SYSTEMD_UNIT_DIRECTORY` for user-level services (typically
  `/usr/lib/systemd/user/`)
- `cargoctl`: Control script for the Cargo user-level service. Installed
  in `$CMAKE_INSTALL_DATADIR/scord/cargoctl` (typically `/usr/share/scord`).
  Used internally by `cargo@.service`.

The `scord_prolog.sh` and `scord_epilog.sh` scripts have been updated to use
the new scripts when configuring the environment for a job.
parent 72648bdf
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -157,6 +157,12 @@ set(SCORD_CTL_BIND_PORT
  )
message(STATUS "[${PROJECT_NAME}] server bind port: ${SCORD_CTL_BIND_PORT}")

set(CARGO_PORT
  "62000"
  CACHE STRING
  "Define the port through wich we should commmunicate with Cargo"
  )

option(SCORD_BUILD_EXAMPLES "Build examples (disabled by default)" OFF)

option(SCORD_BUILD_TESTS "Build tests (disabled by default)" OFF)
+9 −7
Original line number Diff line number Diff line
@@ -59,6 +59,11 @@ install(
  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
)

find_program(MPIEXEC_EXECUTABLE
  NAMES mpiexec mpiexec.hydra mpiexec.mpd mpirun
  PATH_SUFFIXES bin sbin
  DOC "MPI launcher executable")

configure_file(scord_common.sh.in scord_common.sh @ONLY)
configure_file(scord_prolog.sh.in scord_prolog.sh @ONLY)
configure_file(scord_epilog.sh.in scord_epilog.sh @ONLY)
@@ -66,12 +71,9 @@ configure_file(scord_epilog.sh.in scord_epilog.sh @ONLY)
get_filename_component(INSTALL_DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} NAME)

install(
  FILES ${CMAKE_CURRENT_BINARY_DIR}/scord_common.sh
  PROGRAMS
    ${CMAKE_CURRENT_BINARY_DIR}/scord_common.sh
    ${CMAKE_CURRENT_BINARY_DIR}/scord_prolog.sh
    ${CMAKE_CURRENT_BINARY_DIR}/scord_epilog.sh
  DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/${INSTALL_DESTINATION}
  PERMISSIONS
    OWNER_EXECUTE OWNER_WRITE OWNER_READ
    GROUP_EXECUTE GROUP_READ
    WORLD_EXECUTE WORLD_READ
)
+29 −0
Original line number Diff line number Diff line
@@ -103,6 +103,34 @@ function get_nodelist {
  readarray -t rv < <(scontrol show hostnames "$2")
}

# Get the list of hostnames associated with a hostlist as a CSV string
#   Usage: get_nodelist_as_csv <out_var> <nodelist>
#   Example:
#     declare hn
#     get_nodelist_as_csv hn_csv tux[1,3-4],snoo[1-2]
#     echo "${hn_csv}"  # tux1,tux3,tux4,snoo1,snoo2
function get_nodelist_as_csv {

  if [[ -z "$1" ]]; then
    echo "No output array specified"
    return 1
  fi

  if [[ -z "$2" ]]; then
    echo "No hostlist specified"
    return 1
  fi

  local -n rv=$1
  declare -a hn
  if ! get_nodelist hn "$2"; then
    return 1
  fi
  # shellcheck disable=SC2034
  printf -v rv "%s," "${hn[@]}"
  rv="${rv%,}"
}

# Get the list of IP addresses associated with a hostname
#   Usage: get_addrs <out_array> <hostname>
#   Example:
@@ -153,3 +181,4 @@ function get_addrs {
export SCORDCTL_PROGRAM="@SCORD_CTL_BIN@"
export SCORDCTL_PROTO="@SCORD_TRANSPORT_PROTOCOL@"
export SCORDCTL_PORT="@SCORD_CTL_BIND_PORT@"
export CARGO_PORT="@CARGO_PORT@"
+37 −2
Original line number Diff line number Diff line
@@ -54,7 +54,7 @@ get_nodelist hostnames "$SLURM_NODELIST"

# create a temporary directory for the job and redirect both stdout and stderr
# to a log file within it
WORKDIR="$EPILOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOBID"
WORKDIR="$EPILOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOB_ID"
if [ ! -d "$WORKDIR" ]; then
  run_as "$SLURM_JOB_USER" mkdir -p "$WORKDIR"
fi
@@ -77,7 +77,42 @@ if [[ "$HOSTNAME" != "${hostnames[0]}" ]]; then
fi

echo "Shutting down adhoc controller for job $SLURM_JOB_ID (user: $SLURM_JOB_USER)"
PIDFILE="$EPILOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOBID/scord-ctl.pid"
PIDFILE="$EPILOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOB_ID/scord-ctl.pid"
if [[ -f "$PIDFILE" ]]; then
  kill -TERM "$(<"$PIDFILE")"
fi

# find out the IP address of the first node of the allocation
declare -a addrs
if ! get_addrs addrs "$HOSTNAME" v4; then
  echo "Error searching IP addresses for $HOSTNAME."
  exit 1
fi

if ((${#addrs[@]} == 0)); then
  echo "No addresses found."
  exit 1
fi

if [[ -z $SLURM_JOB_ID || -z $SLURM_JOB_UID ]]; then
  echo "Missing required environment variables" >&2
  exit 1
fi

# shellcheck disable=SC2016
USER_HOME=$(run_as "$SLURM_JOB_USER" echo '$HOME')
CONFIG_DIRECTORY="${XDG_CONFIG_HOME:-$USER_HOME/.config}/cargo"

CARGO_ID=$(echo "cargo_$SLURM_JOB_ID.$SLURM_JOB_UID" | sha256sum | awk '{ print $1 }')
CARGO_CONFIG_FILE=$CONFIG_DIRECTORY/$CARGO_ID.cfg
CARGO_SERVICE_NAME=$(systemd-escape --template cargo@.service "$CARGO_ID")

echo "Shutting down Cargo data stager for job $SLURM_JOB_ID (user: $SLURM_JOB_USER)"

if ! run_as "$SLURM_JOB_USER" systemctl --user stop "$CARGO_SERVICE_NAME"; then
  exit 1
fi

if [[ -e "$CARGO_CONFIG_FILE" ]]; then
  rm "$CARGO_CONFIG_FILE"
fi
+86 −6
Original line number Diff line number Diff line
@@ -25,7 +25,6 @@
# SPDX-License-Identifier: GPL-3.0-or-later                                    #
################################################################################


# This is a prolog script for SLURM that starts the SCORD adhoc controller
# for the job. It is meant to be used with the SCORD SLURM plugin.
# The script is executed as the user that submitted the job. The script
@@ -59,10 +58,12 @@ fi
HOSTNAME=$(hostname -s)
declare -a hostnames
get_nodelist hostnames "$SLURM_NODELIST"
declare hostnames_csv
get_nodelist_as_csv hostnames_csv "$SLURM_NODELIST"

# create a temporary directory for the job and redirect both stdout and stderr
# to a log file within it
WORKDIR="$PROLOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOBID"
WORKDIR="$PROLOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOB_ID"
if [ ! -d "$WORKDIR" ]; then
  run_as "$SLURM_JOB_USER" mkdir -p "$WORKDIR"
fi
@@ -72,8 +73,8 @@ if ((${#hostnames[@]} == 0)); then
  exit 0
fi

# only run on the first node of the allocation (scord-ctl will always be
# started on the first node of the allocation)
# only run on the first node of the allocation (both scord-ctl and Cargo
# are always started on the first node of the allocation)
if [[ "$HOSTNAME" != "${hostnames[0]}" ]]; then
  exit 0
fi
@@ -90,7 +91,7 @@ if ((${#addrs[@]} == 0)); then
  exit 1
fi

ADDRESS=$(echo "${addrs[@]}" | awk '{ print $1; exit }')
ADDRESS=$(echo "${addrs[0]}" | awk '{ print $1; exit }')

# now that we have a specific working directory, move the previous log file
# into $WORKDIR so that we have all messages in one place (since the file is
@@ -98,7 +99,11 @@ ADDRESS=$(echo "${addrs[@]}" | awk '{ print $1; exit }')
# messages are written)
mv "$PROLOG_TMPDIR/scord_prolog.$SLURM_JOB_ID.log" "$WORKDIR/scord_prolog.log"

# start the adhoc controller in the background and store its PID in a file
################################################################################
# Start the scorc-ctl adhoc controller.
#
# in the background and store its PID in a
# file
echo "Starting adhoc controller for job $SLURM_JOB_ID (user: $SLURM_JOB_USER)"
run_as "$SLURM_JOB_USER" \
  "$SCORDCTL_PROGRAM" \
@@ -120,4 +125,79 @@ PID=$(<"$WORKDIR/scord-ctl.pid")

echo "Adhoc controller started successfully (PID: $PID)"

################################################################################
# Start the Cargo data stager.

# N.B.: Since Slurm doesn't allow programs in the prolog to survive beyond
# their parent script, we start the data stager as a systemd (user-level)
# service. Care must, thus, be taken to ensure that the service is stopped
# when the job finishes.
echo "Starting Cargo data stager for job $SLURM_JOB_ID (user: $SLURM_JOB_USER)"

if [[ -z $SLURM_JOB_ID || -z $SLURM_JOB_UID ]]; then
  echo "Missing required environment variables" >&2
  exit 1
fi

# Step 1: Find (or create) the user's directory where configurations can be
# stored (note that $HOME is not set when this prolog script is being executed).
# shellcheck disable=SC2016
USER_HOME=$(run_as "$SLURM_JOB_USER" echo '$HOME')
USER_CONFIG_DIRECTORY="${XDG_CONFIG_HOME:-$USER_HOME/.config}"
CARGO_CONFIG_DIRECTORY="$USER_CONFIG_DIRECTORY/cargo"
SYSTEMD_USER_DIRECTORY="$USER_CONFIG_DIRECTORY/systemd/user"

[[ ! -d "$USER_CONFIG_DIRECTORY" ]] && run_as "$SLURM_JOB_USER"  mkdir -p "$USER_CONFIG_DIRECTORY"
[[ ! -d "$CARGO_CONFIG_DIRECTORY" ]] && run_as "$SLURM_JOB_USER" mkdir -p "$CARGO_CONFIG_DIRECTORY"
[[ ! -d "$SYSTEMD_USER_DIRECTORY" ]] && run_as "$SLURM_JOB_USER" mkdir -p "$SYSTEMD_USER_DIRECTORY"

# Step2: Copy the service file provided by Cargo to the user's configuration
# directory so that systemd can find it.
CARGO_SERVICE_FILE="@CARGO_DATA_INSTALL_DIR@/cargo@.service"

if [[ ! -f "$CARGO_SERVICE_FILE" ]]; then
  echo "Cargo service file not found: $CARGO_SERVICE_FILE"
  echo "Please check your Cargo installation"
  exit 1
fi

if ! run_as "$SLURM_JOB_USER" cp "$CARGO_SERVICE_FILE" "$SYSTEMD_USER_DIRECTORY"; then
  exit 1
fi

# Step 3: Create a configuration file for the Cargo user-level service
# required by this job.
# Each Cargo user-level service must be configured for its job and identified
# by a unique ID. We use the job ID and user ID to generate a unique ID
# for this service instance. Since systemd doesn't allow to easily parameterize
# a service file, we use a template service file (`cargo@.service`) and
# generate a specific configuration file each Cargo service.
CARGO_ID=$(echo "cargo_$SLURM_JOB_ID.$SLURM_JOB_UID" | sha256sum | awk '{ print $1 }')
CARGO_CONFIG_FILE=$CARGO_CONFIG_DIRECTORY/$CARGO_ID.cfg
CARGO_NUM_NODES=1 #TODO: ask scord
CARGO_MASTER_ADDRESS="$SCORDCTL_PROTO://$ADDRESS:$CARGO_PORT"
CARGO_INSTANCE_NAME=$(systemd-escape --template cargo@.service "$CARGO_ID")

cat <<EOT >>"$CARGO_CONFIG_FILE"
CARGO_ID=$CARGO_ID
CARGO_HOSTS=$hostnames_csv
CARGO_NUM_NODES=$CARGO_NUM_NODES
CARGO_ADDRESS=$CARGO_MASTER_ADDRESS
EOT

chown "$SLURM_JOB_USER":"$SLURM_JOB_GROUP" "$CARGO_CONFIG_FILE"

if ! run_as "$SLURM_JOB_USER" systemctl --user start "$CARGO_INSTANCE_NAME"; then
  exit 1
fi

sleep 1s

if ! run_as "$SLURM_JOB_USER" systemctl --user is-active --quiet "$CARGO_INSTANCE_NAME"; then
  echo "Cargo data stager failed to start"
  exit 1
fi

echo "Cargo data stager started successfully"

exit 0