diff --git a/CMakeLists.txt b/CMakeLists.txt index c7fa1f147ea0723bf35d6e7c59a63539a75c0e08..b0d5385e640f185dbc84706bf8dd354a8006f7eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -149,6 +149,14 @@ set(SCORD_BIND_PORT ) message(STATUS "[${PROJECT_NAME}] server bind port: ${SCORD_BIND_PORT}") +### controller bind port +set(SCORD_CTL_BIND_PORT + "52001" + CACHE STRING + "Define the bind port for the ${PROJECT_NAME}-ctl controller (default: 52001)" + ) +message(STATUS "[${PROJECT_NAME}] server bind port: ${SCORD_CTL_BIND_PORT}") + option(SCORD_BUILD_EXAMPLES "Build examples (disabled by default)" OFF) option(SCORD_BUILD_TESTS "Build tests (disabled by default)" OFF) @@ -303,6 +311,7 @@ add_compile_definitions("$<$:LOGGER_ENABLE_DEBUG>") add_subdirectory(etc) add_subdirectory(src) +add_subdirectory(plugins) if (SCORD_BUILD_EXAMPLES) add_subdirectory(examples) diff --git a/cmake/FindSlurm.cmake b/cmake/FindSlurm.cmake new file mode 100644 index 0000000000000000000000000000000000000000..d770b90030116e9f5a14522392f53ed4291310cd --- /dev/null +++ b/cmake/FindSlurm.cmake @@ -0,0 +1,101 @@ +################################################################################ +# Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain # +# # +# This software was partially supported by the EuroHPC-funded project ADMIRE # +# (Project ID: 956748, https://www.admire-eurohpc.eu). # +# # +# This file is part of scord. # +# # +# scord is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# scord is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with scord. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +#[=======================================================================[.rst: +FindSlurm +--------- + +Find Slurm include dirs and libraries. + +Use this module by invoking find_package with the form:: + + find_package(Slurm + [version] [EXACT] # Minimum or EXACT version e.g. 0.6.2 + [REQUIRED] # Fail with error if Slurm is not found + ) + +Imported Targets +^^^^^^^^^^^^^^^^ + +This module provides the following imported targets, if found: + +``Slurm::Slurm`` + The Slurm library + +Result Variables +^^^^^^^^^^^^^^^^ + +This will define the following variables: + +``Slurm_FOUND`` + True if the system has the Slurm library. +``Slurm_VERSION`` + The version of the Slurm library which was found. +``Slurm_INCLUDE_DIRS`` + Include directories needed to use Slurm. +``Slurm_LIBRARIES`` + Libraries needed to link to Slurm. + +Cache Variables +^^^^^^^^^^^^^^^ + +The following cache variables may also be set: + +``SLURM_INCLUDE_DIR`` + The directory containing ``slurm.h``. +``SLURM_LIBRARY`` + The path to the Slurm library. + +#]=======================================================================] + +find_path( + SLURM_INCLUDE_DIR + NAMES slurm/slurm.h + PATH_SUFFIXES include +) + +find_library(SLURM_LIBRARY NAMES slurm) + +mark_as_advanced(SLURM_INCLUDE_DIR SLURM_LIBRARY) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + Slurm + FOUND_VAR Slurm_FOUND + REQUIRED_VARS SLURM_LIBRARY SLURM_INCLUDE_DIR + VERSION_VAR Slurm_VERSION +) + +if(Slurm_FOUND) + set(Slurm_INCLUDE_DIRS ${SLURM_INCLUDE_DIR}) + set(Slurm_LIBRARIES ${SLURM_LIBRARY}) + if(NOT TARGET Slurm::Slurm) + add_library(Slurm::Slurm UNKNOWN IMPORTED) + set_target_properties( + Slurm::Slurm + PROPERTIES IMPORTED_LOCATION "${SLURM_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${SLURM_INCLUDE_DIR}" + ) + endif() +endif() diff --git a/docker/0.2.0-wip/Dockerfile b/docker/0.2.0-wip/Dockerfile index 1335f0af66253348e9a73838ba251c16f95eb2b8..fea5b4d9280194305553a00f0dea04bb270ae468 100644 --- a/docker/0.2.0-wip/Dockerfile +++ b/docker/0.2.0-wip/Dockerfile @@ -34,6 +34,8 @@ RUN apt-get update && \ python3-venv \ # redis-plus-plus dependencies \ libhiredis-dev \ + # Slurm plugin dependencies \ + libslurm-dev \ # tests dependencies \ python3-pip && \ ### install cmake 3.23.1 ################################################### diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 4dc1488344fbd8d283ea8351fda3e32a597a26f2..59580175e29b834eab47ba9dbe76264468d90994 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -52,7 +52,6 @@ if(SCORD_BUILD_TESTS) set(SCORD_CTL_TRANSPORT_PROTOCOL ${SCORD_TRANSPORT_PROTOCOL}) set(SCORD_CTL_BIND_ADDRESS ${SCORD_BIND_ADDRESS}) - math(EXPR SCORD_CTL_BIND_PORT "${SCORD_BIND_PORT} + 1") set(SCORD_CTL_ADDRESS_STRING ${SCORD_CTL_TRANSPORT_PROTOCOL}://${SCORD_CTL_BIND_ADDRESS}:${SCORD_CTL_BIND_PORT}) diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8f3bc11fe0357eb0f4ee12777d171f1b218ad016 --- /dev/null +++ b/plugins/CMakeLists.txt @@ -0,0 +1,25 @@ +################################################################################ +# Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain # +# # +# This software was partially supported by the EuroHPC-funded project ADMIRE # +# (Project ID: 956748, https://www.admire-eurohpc.eu). # +# # +# This file is part of scord. # +# # +# scord is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# scord is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with scord. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +add_subdirectory(slurm) diff --git a/plugins/slurm/CMakeLists.txt b/plugins/slurm/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5c7b05b7a4a0b365e4eeba5ffa35b4db2d64fb60 --- /dev/null +++ b/plugins/slurm/CMakeLists.txt @@ -0,0 +1,75 @@ +################################################################################ +# Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain # +# # +# This software was partially supported by the EuroHPC-funded project ADMIRE # +# (Project ID: 956748, https://www.admire-eurohpc.eu). # +# # +# This file is part of scord. # +# # +# scord is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# scord is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with scord. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +message(STATUS "[${PROJECT_NAME}] Searching for Slurm") +find_package(Slurm REQUIRED) + +add_library(slurm-plugin SHARED) + +get_target_property(SCORD_CTL_BIN scord-ctl SCORD_CTL_BINARY) + +configure_file(defaults.h.in defaults.h @ONLY) + +target_sources( + slurm-plugin PRIVATE slurmadmcli.c ${CMAKE_CURRENT_BINARY_DIR}/defaults.h + utils.c utils.h +) + +target_include_directories( + slurm-plugin + PUBLIC $ + $ +) + +target_link_libraries(slurm-plugin PUBLIC Slurm::Slurm libscord) + +set_target_properties( + slurm-plugin + PROPERTIES LINKER_LANGUAGE C + OUTPUT_NAME slurmadmcli + PREFIX "lib" + SUFFIX ".so" +) + +install( + TARGETS slurm-plugin + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} +) + +configure_file(scord_common.sh.in scord_common.sh @ONLY) +configure_file(scord_prolog.sh.in scord_prolog.sh @ONLY) +configure_file(scord_epilog.sh.in scord_epilog.sh @ONLY) + +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/scord_common.sh + ${CMAKE_CURRENT_BINARY_DIR}/scord_prolog.sh + ${CMAKE_CURRENT_BINARY_DIR}/scord_epilog.sh + DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/slurm + PERMISSIONS + OWNER_EXECUTE OWNER_WRITE OWNER_READ + GROUP_EXECUTE GROUP_READ + WORLD_EXECUTE WORLD_READ +) diff --git a/plugins/slurm/README.md b/plugins/slurm/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4a041d08ef55a50ccc27432f4070343680251cf1 --- /dev/null +++ b/plugins/slurm/README.md @@ -0,0 +1,160 @@ +# Slurm plugin + +This directory contains a Slurm plugin that enables the use of the +Scord storage coordination service in Slurm jobs. The plugin consists +of a shared object (`libslurmadmcli.so`) that is loaded by Slurm's plugin +system and extends Slurm's functionalities. + +The `libslurmadmcli.so` shared library is a Slurm SPANK¹ plugin that extends +Slurm's command line arguments to allow users to provide information for +`scord`, serving as a simple interface between Slurm itself and the Scord +storage coordination service². + +## Installation + +The core of the plugin is written in C and requires the Slurm and the Scord C +libraries to be compiled. It also requires access to the Slurm +`spank.h` header. The plugin is compiled as a shared object that is +loaded by Slurm's plugin system. + +Scord's build system can be used to compile the plugin. The following +command will compile the plugin: + +```bash + $ make slurm-plugin install +``` + +Once the shared object is compiled, it only needs to be configured to be +used by Slurm. For example, if `libslurmadmcli.so` is installed in +`/usr/local/lib/`, the following line should be added to the Slurm plugin +configuration file (usually `/etc/slurm/plugstack.conf`) + +```conf + optional /usr/local/lib/libslurmadmcli.so scord_addr=ofi+tcp://127.0.0.1:52000 scord_proto=ofi+tcp://127.0.0.1 scordctl_bin=scord-ctl +``` + +The key-value pairs following the plugin are optional configuration +variables. + +- `scord_addr`: The address to contact the Scord service in Mercury format. + For instance, if the Scord service has been configured to listen on + port `52000` on a machine with the IP address `192.168.1.128` and using + `tcp` as the transport protocol, the address would be + `ofi+tcp://192.168.1.128:52000`. +- `scord_proto`: The transport protocol to use in Mercury format (default + to `ofi+tcp`). Must be the same configured in Scord (see Scord's + documentation for more details). +- `scordctl_bin`: The scord-ctl binary to run on every node of an + allocation, can be the path to an executable (default to scord-ctl). + +Besides the shared library, the plugin also installs +[prolog and epilog](https://slurm.schedmd.com/prolog_epilog.html) +scripts for job control under `$PREFIX/share/scord/slurm/`. In order to enable +them, the following lines should be added to Slurm's configuration file +(where `$PREFIX` should be replaced with the path where Scord is installed): + +```conf + Prolog=$PREFIX/share/scord/slurm/scord_prolog.sh + Epilog=$PREFIX/share/scord/slurm/scord_epilog.sh +``` + +## Usage + +### Ad-hoc storage services + +The plugin extends Slurm's command line arguments to allow users to request +the deployment of adhoc storage services for their jobs. The following +arguments are available for `srun`/`sbatch`: + +- `--adm-adhoc`: The job requires an adhoc storage service. By default + `--adm-adhoc-overlap` is assumed. The type of adhoc storage service can be + one of: + - `gekkofs`: The job requires the GekkoFS adhoc file system. + - `expand`: The job requires the Expand adhoc file system. + - `hercules`: The job requires the Hercules adhoc file system. + - `dataclay`: The job requires the dataClay adhoc object store. + +- `--adm-adhoc-overlap`: The requested adhoc storage service will be + deployed on the same nodes as the application and the nodes will be + shared. The number of nodes assigned for the adhoc storage service can + be controlled with the `--adm-adhoc-nodes` option. If not specified, the + deployed adhoc storage service will share all the nodes assigned to the job. + +- `--adm-adhoc-exclusive`: The adhoc storage service will be deployed on the + same nodes as the application, but the adhoc nodes will not + be shared with the application. The number of nodes assigned for the adhoc + storage service MUST be specified with the `--adm-adhoc-nodes` option and + cannot be greater than the number of nodes assigned to the job. Note, however, + that the value of `--adm-adhoc-nodes` must be smaller than the value of + `--nodes` (or `--ntasks`). Otherwise, the application would have no + resources to run on. + +- `--adm-adhoc-dedicated`: The ad-hoc storage service will be deployed in an + independent job allocation and all the nodes for the allocation will be + available for it. An `adhoc-id` will be generated for it and will be + returned to the user so that other jobs can use the deployed ad-hoc storage + service. In this mode, the resources assigned to the ad-hoc storage service + can be controlled with the usual Slurm options (e.g. `--nodes`, + `--ntasks`, `--time`, etc.). + +- `--adm-adhoc-remote `: The job will use a remote and + dedicated ad-hoc storage service that must have been previously requested in a + different submission with the `--adm-adhoc-dedicated` option. An + identifier for that ad-hoc storage service must be provided as an argument. + +Users can request and control the automatic deployment of a remote adhoc +storage service using the following `srun`/`sbatch` arguments: + +- `--adm-adhoc-dedicated`: The job allocation will be used exclusively for an + adhoc storage service. +- `--adm-adhoc-nodes`: The number of nodes to use for the adhoc storage + service. The nodes will be allocated from the same partition as the + compute nodes. This option is only valid when used with + `--adm-adhoc-local` or `--adm-adhoc-overlap`. + +Examples: + +```bash +# Request a local GekkoFS adhoc storage service +$ sbatch --adm-adhoc gekkofs [--adm-adhoc-local] script.sh + +# Request a local (overlapping) GekkoFS adhoc storage service +$ sbatch --adm-adhoc gekkofs --adm-adhoc-overlap script.sh + +# Request an independent GekkoFS adhoc storage service +$ sbatch --adm-adhoc gekkofs --adm-adhoc-exclusive --adm-adhoc-nodes 10 --adm-adhoc-walltime 00:10:00 noop.sh +Submitted batch job 42 +Will deploy adhoc storage 123456 +# Wait for the adhoc storage service to be started +$ sbatch --adm-adhoc-remote 123456 --dependency=after:42 script.sh +``` + +### Dataset management + +The plugin also provides a set of options to manage datasets: + +- `--adm-input `: Define datasets that should be transferred + between the PFS and the ad-hoc storage service. The `dataset-routing` is + defined as `ORIGIN-TIER:PATH TARGET-TIER:PATH`. For example, to transfer + the file `input000.dat` from the Lustre PFS to the an on-demand GekkoFS + ad-hoc storage service, the option could be specified in the following manner: + `"lustre:/input.dat gekkofs:/input.dat"` +- `--adm-output `: Define datasets that should be + automatically transferred between the ad-hoc storage system and the PFS. + The ad-hoc storage will guarantee that the dataset is not transferred while + there are processes accessing the file. The datasets will be transferred + before the job allocation finishes if at all possible, but no hard guarantees + are made. +- `--adm-expect-output `: Define datasets that should be + automatically transferred between the ad-hoc storage system and the PFS. + The ad-hoc storage will guarantee that the dataset is not transferred while + there are processes accessing the file. The datasets will be transferred + before the job allocation finishes. If the transfer cannot be completed + before the job allocation finishes, the job will be cancelled. +- `--adm-inout `: Define datasets that should be + transferred INTO the ad-hoc storage AND BACK when finished. + +## References + +1. See manual page spank(7) and +2. See https://storage.bsc.es/gitlab/eu/admire/io-scheduler diff --git a/plugins/slurm/defaults.h.in b/plugins/slurm/defaults.h.in new file mode 100644 index 0000000000000000000000000000000000000000..0568e0990df4a4d11348b24e1f215257aa411cf9 --- /dev/null +++ b/plugins/slurm/defaults.h.in @@ -0,0 +1,43 @@ +/****************************************************************************** + * Copyright 2022-2023, Inria, France. + * Copyright 2023, Barcelona Supercomputing Center (BSC), Spain. + * All rights reserved. + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the Lesser GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the Lesser GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: LGPL-3.0-or-later + *****************************************************************************/ + +#ifndef SCORD_SLURM_PLUGIN_DEFAULTS_H +#define SCORD_SLURM_PLUGIN_DEFAULTS_H + +// clang-format off + +#define SCORD_PORT_DEFAULT @SCORD_BIND_PORT@ +#define SCORD_SERVER_DEFAULT "@SCORD_TRANSPORT_PROTOCOL@://@SCORD_BIND_ADDRESS@:@SCORD_BIND_PORT@" +#define SCORD_PROTO_DEFAULT "@SCORD_TRANSPORT_PROTOCOL@" + +#define SCORDCTL_PROG_DEFAULT "@SCORD_CTL_BIN@" +#define SCORDCTL_PROTO_DEFAULT SCORD_PROTO_DEFAULT +#define SCORDCTL_PORT_DEFAULT @SCORD_CTL_BIND_PORT@ +#define SCORDCTL_TMPDIR_DEFAULT "/tmp" + +// clang-format on + +#endif // SCORD_SLURM_PLUGIN_DEFAULTS_H diff --git a/plugins/slurm/scord_common.sh.in b/plugins/slurm/scord_common.sh.in new file mode 100644 index 0000000000000000000000000000000000000000..539ad4114ea487435766d439ddbec06482fa11f4 --- /dev/null +++ b/plugins/slurm/scord_common.sh.in @@ -0,0 +1,155 @@ +################################################################################ +# Copyright 2022-2023, Inria, France. # +# Copyright 2023, Barcelona Supercomputing Center (BSC), Spain. # +# All rights reserved. # +# # +# This software was partially supported by the EuroHPC-funded project ADMIRE # +# (Project ID: 956748, https://www.admire-eurohpc.eu). # +# # +# This file is part of scord. # +# # +# scord is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# scord is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with scord. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# Run a command as a user +# Usage: run_as +# Example: +# run_as root mkdir /root/test +# run_as root "mkdir /root/test && chown root:root /root/test" +function run_as { + local username=$1 + shift + + if [[ -z "$*" ]]; then + echo "No command specified" + return 1 + fi + + if ! id -u "$username" &>/dev/null; then + echo "User '$username' does not exist" + return 1 + fi + + if [[ "$USER" == "$username" ]]; then + echo "Already running as user '$username'" + ${SHELL} -c "$*" + return 0 + fi + + if su "$username" -c "$*"; then + return 0 + fi + + echo "Failed to run '$*' as user '$username'" + return 1 +} + +# Get the list of hosts associated with a list of hostnames +# Usage: get_hostlist +# Example: +# declare hl +# get_hostlist hl tux1,tux3,tux4,snoo1,snoo2 +# echo "$hl" # tux[1,3-4],snoo[1-2] +function get_hostlist { + + if [[ -z "$1" ]]; then + echo "No output variable specified" + return 1 + fi + + if [[ -z "$2" ]]; then + echo "No hostlist specified" + return 1 + fi + + local -n rv=$1 + rv=$(scontrol show hostlist "$2") +} + +# Get the list of hostnames associated with a hostlist +# Usage: get_nodelist +# Example: +# declare -a hn +# get_nodelist hn tux[1,3-4],snoo[1-2] +# echo "${hn[0]}" # tux1 +# echo "${hn[@]}" # tux1 tux3 tux4 snoo1 snoo2 +function get_nodelist { + + if [[ -z "$1" ]]; then + echo "No output array specified" + return 1 + fi + + if [[ -z "$2" ]]; then + echo "No hostlist specified" + return 1 + fi + + local -n rv=$1 + # shellcheck disable=SC2034 + readarray -t rv < <(scontrol show hostnames "$2") +} + +# Get the list of IP addresses associated with a hostname +# Usage: get_addrs +# Example: +# declare -a addrs +# get_addrs addrs tux1 +# echo "${addrs[0]}" # 192.18.0.7 +function get_addrs { + + if [[ -z "$1" ]]; then + echo >&2 "No output array specified" + return 1 + fi + + if [[ -z "$2" ]]; then + echo >&2 "No hostname specified" + return 1 + fi + + local db="ahosts" + + if [[ -n "$3" ]]; then + case "$3" in + v4 | V4) + db+="v4" + ;; + v6 | V6) + db+="v6" + ;; + "") ;; + + *) + echo >&2 "Invalid address family requested: $3" + return 1 + ;; + + esac + fi + + local -n out=$1 + local hostname=$2 + + # shellcheck disable=SC2034 + readarray -t out < <(getent $db "$hostname") + return 0 +} + +# shellcheck disable=SC2034 +export SCORDCTL_PROGRAM="@SCORD_CTL_BIN@" +export SCORDCTL_PROTO="@SCORD_TRANSPORT_PROTOCOL@" +export SCORDCTL_PORT="@SCORD_CTL_BIND_PORT@" diff --git a/plugins/slurm/scord_epilog.sh.in b/plugins/slurm/scord_epilog.sh.in new file mode 100755 index 0000000000000000000000000000000000000000..b45902a155da84e6c0d22be425a4af28e7a3c4ff --- /dev/null +++ b/plugins/slurm/scord_epilog.sh.in @@ -0,0 +1,83 @@ +#!/bin/bash +################################################################################ +# Copyright 2022-2023, Inria, France. # +# Copyright 2023, Barcelona Supercomputing Center (BSC), Spain. # +# All rights reserved. # +# # +# This software was partially supported by the EuroHPC-funded project ADMIRE # +# (Project ID: 956748, https://www.admire-eurohpc.eu). # +# # +# This file is part of scord. # +# # +# scord is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# scord is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with scord. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +source @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/slurm/scord_common.sh + +# determine the temporary directory to use for the epilog logs +if [[ -n "$TMPDIR" ]]; then + EPILOG_TMPDIR="$TMPDIR" +else + EPILOG_TMPDIR="/tmp" +fi + +# redirect stdout and stderr to a log file in $EPILOG_TMPDIR +exec &>"$EPILOG_TMPDIR/scord_epilog.$SLURM_JOB_ID.log" +# print out all commands +set -x +# print out the value of all variables +env + +# if no ADMIRE CLI options were specified, we don't need to do anything +if ! compgen -v SPANK__SLURM_SPANK_OPTION_admire_cli_; then + echo "SCORD SLURM plugin not requested. Exiting." + exit 0 +fi + +# find out some information about the job and where we are running +HOSTNAME=$(hostname -s) +declare -a hostnames +get_nodelist hostnames "$SLURM_NODELIST" + +# create a temporary directory for the job and redirect both stdout and stderr +# to a log file within it +WORKDIR="$EPILOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOBID" +if [ ! -d "$WORKDIR" ]; then + run_as "$SLURM_JOB_USER" mkdir -p "$WORKDIR" +fi + +# now that we have a specific working directory, move the previous log file +# into $WORKDIR so that we have all messages in one place (since the file is +# still open by the shell, the move operation will not affect where the +# messages are written) +mv "$EPILOG_TMPDIR/scord_epilog.$SLURM_JOB_ID.log" "$WORKDIR/scord_epilog.log" + +if ((${#hostnames[@]} == 0)); then + echo "No hostnames found for job $SLURM_JOB_ID. Weird." + exit 0 +fi + +# only run on the first node of the allocation (scord-ctl will always be +# started on the first node of the allocation) +if [[ "$HOSTNAME" != "${hostnames[0]}" ]]; then + exit 0 +fi + +echo "Shutting down adhoc controller for job $SLURM_JOB_ID (user: $SLURM_JOB_USER)" +PIDFILE="$EPILOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOBID/scord-ctl.pid" +if [[ -f "$PIDFILE" ]]; then + kill -TERM "$(<"$PIDFILE")" +fi diff --git a/plugins/slurm/scord_prolog.sh.in b/plugins/slurm/scord_prolog.sh.in new file mode 100755 index 0000000000000000000000000000000000000000..fb4a57eed843f6dd1e6d639132628c3c4e69b353 --- /dev/null +++ b/plugins/slurm/scord_prolog.sh.in @@ -0,0 +1,123 @@ +#!/bin/bash +################################################################################ +# Copyright 2022-2023, Inria, France. # +# Copyright 2023, Barcelona Supercomputing Center (BSC), Spain. # +# All rights reserved. # +# # +# This software was partially supported by the EuroHPC-funded project ADMIRE # +# (Project ID: 956748, https://www.admire-eurohpc.eu). # +# # +# This file is part of scord. # +# # +# scord is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# scord is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with scord. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + + +# This is a prolog script for SLURM that starts the SCORD adhoc controller +# for the job. It is meant to be used with the SCORD SLURM plugin. +# The script is executed as the user that submitted the job. The script +# creates a temporary directory for the job and starts the adhoc controller +# in the background. The PID of the adhoc controller is stored in a file +# in the temporary directory. + +source @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/slurm/scord_common.sh + +# determine the temporary directory to use for the prolog logs +if [[ -n "$TMPDIR" ]]; then + PROLOG_TMPDIR="$TMPDIR" +else + PROLOG_TMPDIR="/tmp" +fi + +# redirect stdout and stderr to a log file in $PROLOG_TMPDIR +exec &>"$PROLOG_TMPDIR/scord_prolog.$SLURM_JOB_ID.log" +# print out all commands +set -x +# print out the value of all variables +env + +# if no ADMIRE CLI options were specified, we don't need to do anything +if ! compgen -v SPANK__SLURM_SPANK_OPTION_admire_cli_; then + echo "SCORD SLURM plugin not requested. Exiting." + exit 0 +fi + +# find out some information about the job and where we are running +HOSTNAME=$(hostname -s) +declare -a hostnames +get_nodelist hostnames "$SLURM_NODELIST" + +# create a temporary directory for the job and redirect both stdout and stderr +# to a log file within it +WORKDIR="$PROLOG_TMPDIR/$SLURM_JOB_USER/$SLURM_JOBID" +if [ ! -d "$WORKDIR" ]; then + run_as "$SLURM_JOB_USER" mkdir -p "$WORKDIR" +fi + +if ((${#hostnames[@]} == 0)); then + echo "No hostnames found for job $SLURM_JOB_ID. Weird." + exit 0 +fi + +# only run on the first node of the allocation (scord-ctl will always be +# started on the first node of the allocation) +if [[ "$HOSTNAME" != "${hostnames[0]}" ]]; then + exit 0 +fi + +# find out the IP address of the first node of the allocation +declare -a addrs +if ! get_addrs addrs "$HOSTNAME" v4; then + echo "Error searching IP addresses for $HOSTNAME." + exit 1 +fi + +if ((${#addrs[@]} == 0)); then + echo "No addresses found." + exit 1 +fi + +ADDRESS=$(echo "${addrs[@]}" | awk '{ print $1; exit }') + +# now that we have a specific working directory, move the previous log file +# into $WORKDIR so that we have all messages in one place (since the file is +# still open by the shell, the move operation will not affect where the +# messages are written) +mv "$PROLOG_TMPDIR/scord_prolog.$SLURM_JOB_ID.log" "$WORKDIR/scord_prolog.log" + +# start the adhoc controller in the background and store its PID in a file +echo "Starting adhoc controller for job $SLURM_JOB_ID (user: $SLURM_JOB_USER)" +run_as "$SLURM_JOB_USER" \ + "$SCORDCTL_PROGRAM" \ + --listen "$SCORDCTL_PROTO://$ADDRESS:$SCORDCTL_PORT" \ + --output "$WORKDIR/scord-ctl.log" \ + --pidfile "$WORKDIR/scord-ctl.pid" \ + '&' + +# TODO: this doesn't work for background processes +# shellcheck disable=SC2181 +if [[ $? -ne 0 ]]; then + echo "Failed to start adhoc controller" + exit 1 +fi + +# give some time to ensure that the PID file has been created +sleep 0.5s +PID=$(<"$WORKDIR/scord-ctl.pid") + +echo "Adhoc controller started successfully (PID: $PID)" + +exit 0 diff --git a/plugins/slurm/slurmadmcli.c b/plugins/slurm/slurmadmcli.c new file mode 100644 index 0000000000000000000000000000000000000000..d47900cc2f35acd7b7aec6e6b934436c3aa529c8 --- /dev/null +++ b/plugins/slurm/slurmadmcli.c @@ -0,0 +1,682 @@ +/****************************************************************************** + * Copyright 2022-2023, Inria, France. + * Copyright 2023, Barcelona Supercomputing Center (BSC), Spain. + * All rights reserved. + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the Lesser GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the Lesser GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: LGPL-3.0-or-later + *****************************************************************************/ + +#include +#include /* SIZE_MAX, uint32_t, etc. */ +#include /* strtoul, getenv, reallocarray */ +#include /* strchr, strncmp, strncpy */ +#include +#include + +#include +#include "defaults.h" +#include "utils.h" + +/** + * Slurm SPANK plugin to handle the ADMIRE adhoc storage CLI. Options are + * forwarded to scord on srun, salloc and sbatch. See the struct spank_option + * for the list of options. + * + * Notes: + * - --adm-adhoc-context-id will be silently truncated to ADHOCID_LEN + * characters, including NULL byte + **/ + +#define ADHOCID_LEN 64 +#define INT32_STR_LEN 16 /* 16 chars are enough to fit an int32 in decimal */ + +#define TAG_NNODES 0 +#define TAG_ADHOC_TYPE 1 +#define TAG_ADHOC_OVERLAP 2 +#define TAG_ADHOC_EXCLUSIVE 3 +#define TAG_ADHOC_DEDICATED 4 +#define TAG_ADHOC_REMOTE 5 +#define TAG_DATASET_INPUT 6 +#define TAG_DATASET_OUTPUT 7 +#define TAG_DATASET_EXPECT_OUTPUT 8 +#define TAG_DATASET_INOUT 9 + +// clang-format off +SPANK_PLUGIN (admire-cli, 1) +// clang-format on + +static int scord_flag = 0; + +/* scord adhoc options */ +static long adhoc_nnodes = 0; +static long adhoc_walltime = 0; +static ADM_adhoc_mode_t adhoc_mode = ADM_ADHOC_MODE_IN_JOB_SHARED; +static ADM_adhoc_storage_type_t adhoc_type = 0; +static char adhoc_id[ADHOCID_LEN] = {0}; + +/* server-related options */ +typedef struct { + const char* addr; + const char* proto; + int port; + const char* prog; + const char* tmpdir; +} scord_server_info_t; + +typedef struct { + scord_server_info_t scord_info; + scord_server_info_t scordctl_info; +} scord_plugin_config_t; + + +static scord_plugin_config_t default_cfg = { + .scord_info = {.addr = SCORD_SERVER_DEFAULT, + .proto = SCORD_PROTO_DEFAULT, + .port = SCORD_PORT_DEFAULT, + .prog = NULL, + .tmpdir = NULL}, + .scordctl_info = {.addr = NULL, + .proto = SCORDCTL_PROTO_DEFAULT, + .port = SCORDCTL_PORT_DEFAULT, + .prog = SCORDCTL_PROG_DEFAULT, + .tmpdir = SCORDCTL_TMPDIR_DEFAULT}}; + +static int +process_opts(int tag, const char* optarg, int remote); + +struct spank_option spank_opts[] = { + { + "adm-adhoc", "type", + "Deploy an ad-hoc storage of type `type` for this job. " + "Supported ad-hoc storages are: gekkofs, expand, hercules, and " + "dataclay. By default, it implies `--adm-adhoc-overlap`, but " + "this behavior can be modified with the " + "`--adm-adhoc-exclusive` or `--adm-adhoc-dedicated flags`.", + 1, /* option takes an argument */ + TAG_ADHOC_TYPE, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-adhoc-overlap", NULL, + "Deploy the requested ad-hoc storage on the same nodes as the " + "compute nodes, but request ad-hoc nodes to BE SHARED " + "with the application. The number of nodes assigned to the " + "ad-hoc storage CAN be specified with the " + "`--adm-adhoc-nodes` option.", + 0, /* option takes an argument */ + TAG_ADHOC_OVERLAP, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-adhoc-exclusive", NULL, + "Deploy the requested ad-hoc storage on the same nodes as the " + "compute nodes, but request ad-hoc nodes to NOT BE SHARED " + "with the application. The number of nodes assigned to the " + "ad-hoc storage MUST be specified with the " + "`--adm-adhoc-nodes` option.", + 0, /* option takes an argument */ + TAG_ADHOC_EXCLUSIVE, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-adhoc-dedicated", NULL, + "Deploy the requested ad-hoc storage service will be deployed " + "in an independent job allocation and all the nodes in this " + "allocation will be available for it. A specific `adhoc-id` " + "will be generated for it and will be returned to the user " + "so that other jobs can refer to this deployed ad-hoc storage " + "service. In this mode, the resources assigned to the ad-hoc " + "storage service can be controlled with the normal Slurm " + "options.", + 0, /* option takes an argument */ + TAG_ADHOC_DEDICATED, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-adhoc-remote", "adhoc-id", + "Use an independent ad-hoc storage already running in its own" + "allocation. The service must have been previously deployed " + "with the `--adm-adhoc-exclusive` option.", + 1, /* option takes an argument */ + TAG_ADHOC_REMOTE, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-adhoc-nodes", "nnodes", + "Dedicate `nnodes` to the ad-hoc storage service. Only " + "valid if paired with `--adm-adhoc-overlap` and " + "`--adm-adhoc-exclusive`. Ignored otherwise.", + 1, /* option takes an argument */ + TAG_NNODES, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-input", "dataset-routing", + "Define datasets that should be transferred between the PFS " + "and the ad-hoc storage service. The `dataset-routing` is " + "defined as `ORIGIN-TIER:PATH TARGET-TIER:PATH`. For example," + "to transfer the file `input000.dat` from the Lustre PFS to " + "the an on-demand GekkoFS ad-hoc storage service, the option " + "could be specified in the following manner: " + " \"lustre:/input.dat gekkofs:/input.dat\"", + 1, /* option takes an argument */ + TAG_DATASET_INPUT, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-output", "dataset-routing", + "Define datasets that should be automatically transferred " + "between the ad-hoc storage system and the PFS. The ad-hoc " + "storage will guarantee that the dataset is not transferred " + "while there are processes accessing the file. The datasets " + "will be transferred before the job allocation finishes if at " + "all possible, but no hard guarantees are made.", + 1, /* option takes an argument */ + TAG_DATASET_OUTPUT, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-expect-output", "dataset-routing", + "Define datasets that are expected to be generated by the " + "application. When using this option, the application itself " + "MUST use the programmatic APIs defined in `scord-user.h`to " + "explicitly request the transfer of the datasets.", + 1, /* option takes an argument */ + TAG_DATASET_EXPECT_OUTPUT, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + { + "adm-expect-inout", "dataset-routing", + "Define the datasets that should be transferred INTO " + "the ad-hoc storage AND BACK when finished.", + 1, /* option takes an argument */ + TAG_DATASET_INOUT, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ + }, + SPANK_OPTIONS_TABLE_END}; + +int +process_opts(int tag, const char* optarg, int remote) { + (void) remote; + + slurm_debug("%s: %s() called", plugin_name, __func__); + + /* srun & sbatch/salloc */ + spank_context_t sctx = spank_context(); + if(sctx != S_CTX_LOCAL && sctx != S_CTX_ALLOCATOR && sctx != S_CTX_REMOTE) + return 0; + + /* if we're here some scord options were passed to the Slurm CLI */ + scord_flag = 1; + + switch(tag) { + case TAG_NNODES: { + char* endptr; + errno = 0; + + adhoc_nnodes = strtol(optarg, &endptr, 0); + if(errno != 0 || endptr == optarg || *endptr != '\0' || + adhoc_nnodes <= 0) { + return -1; + } + + return 0; + } + + case TAG_ADHOC_TYPE: + if(!strncmp(optarg, "gekkofs", strlen("gekkofs"))) { + adhoc_type = ADM_ADHOC_STORAGE_GEKKOFS; + return 0; + } + + if(!strncmp(optarg, "expand", strlen("expand"))) { + adhoc_type = ADM_ADHOC_STORAGE_EXPAND; + return 0; + } + + if(!strncmp(optarg, "hercules", strlen("hercules"))) { + adhoc_type = ADM_ADHOC_STORAGE_HERCULES; + return 0; + } + + if(!strncmp(optarg, "dataclay", strlen("dataclay"))) { + adhoc_type = ADM_ADHOC_STORAGE_DATACLAY; + return 0; + } + + return -1; + + case TAG_ADHOC_EXCLUSIVE: + adhoc_mode = ADM_ADHOC_MODE_IN_JOB_DEDICATED; + return 0; + + case TAG_ADHOC_DEDICATED: + adhoc_mode = ADM_ADHOC_MODE_SEPARATE_NEW; + return 0; + + case TAG_ADHOC_REMOTE: + adhoc_mode = ADM_ADHOC_MODE_SEPARATE_EXISTING; + strncpy(adhoc_id, optarg, ADHOCID_LEN - 1); + adhoc_id[ADHOCID_LEN - 1] = '\0'; + return 0; + + default: + return -1; + } +} + +static int +process_config(int ac, char** av, scord_plugin_config_t* cfg) { + + typedef struct { + const char* name; + size_t len; + enum { TYPE_INT, TYPE_STR } type; + void* value; + } scord_option_t; + +#define EXPAND_SCORD_OPT(opt_name, type, ptr) \ + { opt_name, strlen(opt_name), type, ptr } + + const scord_option_t scord_options[] = { + EXPAND_SCORD_OPT("scord_addr", TYPE_STR, &cfg->scord_info.addr), + EXPAND_SCORD_OPT("scord_proto", TYPE_STR, &cfg->scord_info.proto), + EXPAND_SCORD_OPT("scordctl_prog", TYPE_STR, + &cfg->scordctl_info.prog), + EXPAND_SCORD_OPT("scordctl_port", TYPE_INT, + &cfg->scordctl_info.port), + EXPAND_SCORD_OPT("scordctl_tmpdir", TYPE_STR, + &cfg->scordctl_info.tmpdir), + }; + +#undef EXPAND_SCORD_OPT + + for(int i = 0; i < ac; i++) { + + bool invalid_opt = true; + + for(uint j = 0; j < sizeof(scord_options) / sizeof(scord_option_t); + j++) { + + scord_option_t opt_desc = scord_options[j]; + + if(!strncmp(av[i], opt_desc.name, opt_desc.len)) { + + switch(opt_desc.type) { + case TYPE_INT: { + char* endptr; + int val = (int) strtol(av[i] + opt_desc.len + 1, + &endptr, 10); + if(*endptr != '\0') { + slurm_error("%s: invalid option value: %s", + plugin_name, av[i]); + return -1; + } + + *(int*) opt_desc.value = val; + invalid_opt = false; + break; + } + + case TYPE_STR: + *(char**) opt_desc.value = av[i] + opt_desc.len + 1; + invalid_opt = false; + break; + + default: + slurm_error("%s: invalid option type: %d", plugin_name, + opt_desc.type); + return -1; + } + break; + } + } + + if(invalid_opt) { + slurm_error("%s: invalid option: %s", plugin_name, av[i]); + return -1; + } + } + + return 0; +} + +static int +scord_register_job(scord_plugin_config_t cfg, scord_nodelist_t nodelist, + uint32_t jobid) { + + int rc = 0; + int nnodes = 0; + + ADM_server_t scord_server = NULL; + ADM_node_t* nodes = NULL; + ADM_job_resources_t job_resources = NULL; + ADM_adhoc_resources_t adhoc_resources = NULL; + ADM_adhoc_context_t adhoc_ctx = NULL; + ADM_adhoc_storage_t adhoc_storage = NULL; + ADM_job_requirements_t scord_reqs = NULL; + ADM_job_t scord_job = NULL; + char* adhoc_path = NULL; + + /* First determine the node on which to launch scord-ctl (typically the + * first node of the allocation) */ + ADM_node_t ctl_node = scord_nodelist_get_node(nodelist, 0); + cfg.scordctl_info.addr = margo_address_create( + cfg.scordctl_info.proto, ADM_node_get_hostname(ctl_node), + cfg.scordctl_info.port); + + if(!cfg.scordctl_info.addr) { + slurm_error("%s: failed to compute address scordctl server", + plugin_name); + return -1; + } + + slurm_debug("%s: %s: scord_info:", plugin_name, __func__); + slurm_debug("%s: %s: addr: \"%s\",", plugin_name, __func__, + cfg.scord_info.addr); + slurm_debug("%s: %s: proto: \"%s\",", plugin_name, __func__, + cfg.scord_info.proto); + slurm_debug("%s: %s: port: %d,", plugin_name, __func__, + cfg.scord_info.port); + + slurm_debug("%s: %s: scordctl_info:", plugin_name, __func__); + slurm_debug("%s: %s: addr: \"%s\",", plugin_name, __func__, + cfg.scordctl_info.addr); + slurm_debug("%s: %s: proto: \"%s\",", plugin_name, __func__, + cfg.scordctl_info.proto); + slurm_debug("%s: %s: port: %d,", plugin_name, __func__, + cfg.scordctl_info.port); + + /* Register the job with the scord server */ + scord_server = ADM_server_create(cfg.scord_info.proto, cfg.scord_info.addr); + if(!scord_server) { + slurm_error("%s: scord server creation failed", plugin_name); + rc = -1; + goto end; + } + + nnodes = scord_nodelist_get_nodecount(nodelist); + if(nnodes <= 0) { + slurm_error("%s: wrong scord_nodelist count", plugin_name); + rc = -1; + goto end; + } + + nodes = scord_nodelist_get_nodes(nodelist); + if(!nodes) { + slurm_error("%s: wrong scord_nodelist_nodes", plugin_name); + rc = -1; + goto end; + } + + job_resources = ADM_job_resources_create(nodes, nnodes); + if(!job_resources) { + slurm_error("%s: job_resources creation failed", plugin_name); + rc = -1; + goto end; + } + + /* take the ADHOC_NNODES first nodes for the adhoc */ + adhoc_resources = ADM_adhoc_resources_create( + nodes, adhoc_nnodes < nnodes ? adhoc_nnodes : nnodes); + if(!adhoc_resources) { + slurm_error("%s: adhoc_resources creation failed", plugin_name); + rc = -1; + goto end; + } + + adhoc_ctx = ADM_adhoc_context_create(cfg.scordctl_info.addr, adhoc_mode, + ADM_ADHOC_ACCESS_RDWR, adhoc_walltime, + false); + if(!adhoc_ctx) { + slurm_error("%s: adhoc_context creation failed", plugin_name); + rc = -1; + goto end; + } + + if(ADM_register_adhoc_storage(scord_server, "mystorage", adhoc_type, + adhoc_ctx, adhoc_resources, + &adhoc_storage) != ADM_SUCCESS) { + slurm_error("%s: adhoc_storage registration failed", plugin_name); + rc = -1; + goto end; + } + + /* no inputs or outputs */ + scord_reqs = ADM_job_requirements_create(NULL, 0, NULL, 0, adhoc_storage); + if(!scord_reqs) { + slurm_error("%s: scord job_requirements creation", plugin_name); + rc = -1; + goto end; + } + + if(ADM_register_job(scord_server, job_resources, scord_reqs, jobid, + &scord_job) != ADM_SUCCESS) { + slurm_error("%s: scord job registration failed", plugin_name); + rc = -1; + goto end; + } + + if(ADM_deploy_adhoc_storage(scord_server, adhoc_storage, &adhoc_path) != + ADM_SUCCESS) { + slurm_error("%s: adhoc storage deployment failed", plugin_name); + rc = -1; + goto end; + } + +end: + if(adhoc_path) { + free(adhoc_path); + } + + if(scord_job) { + ADM_remove_job(scord_server, scord_job); + } + + if(scord_reqs) { + ADM_job_requirements_destroy(scord_reqs); + } + + if(adhoc_storage) { + ADM_adhoc_storage_destroy(adhoc_storage); + } + + if(adhoc_ctx) { + ADM_adhoc_context_destroy(adhoc_ctx); + } + + if(adhoc_resources) { + ADM_adhoc_resources_destroy(adhoc_resources); + } + + if(job_resources) { + ADM_job_resources_destroy(job_resources); + } + + if(scord_server) { + ADM_server_destroy(scord_server); + } + + return rc; +} + +/** + * Called just after plugins are loaded. In remote context, this is just after + * job step is initialized. This function is called before any plugin option + * processing. + * + * ┌-----------------------┐ + * | Command | Context | + * ├---------|-------------┤ + * | srun | S_CTX_LOCAL | + * | salloc | S_CTX_ALLOC | + * | sbatch | S_CTX_ALLOC | + * └-----------------------┘ + * + * Available in the following contexts: + * S_CTX_LOCAL (srun) + * S_CTX_ALLOCATOR (sbatch/salloc) + * S_CTX_REMOTE (slurmstepd) + * S_CTX_SLURMD (slurmd) + */ +int +slurm_spank_init(spank_t sp, int ac, char** av) { + (void) ac; + (void) av; + + spank_err_t rc = ESPANK_SUCCESS; + + spank_context_t sctx = spank_context(); + if(sctx == S_CTX_LOCAL || sctx == S_CTX_ALLOCATOR || sctx == S_CTX_REMOTE) { + + slurm_debug("%s: %s() registering options", plugin_name, __func__); + + /* register adm/scord options */ + struct spank_option* opt = &spank_opts[0]; + while(opt->name) { + rc = spank_option_register(sp, opt++); + } + } + + return rc == ESPANK_SUCCESS ? 0 : -1; +} + +/** + * Called in local context only after all options have been processed. + * This is called after the job ID and step IDs are available. This happens in + * `srun` after the allocation is made, but before tasks are launched. + * + * ┌-----------------------┐ + * | Command | Context | + * ├---------|-------------┤ + * | srun | S_CTX_LOCAL | + * └-----------------------┘ + * + * Available in the following contexts: + * S_CTX_LOCAL (srun) + */ +int +slurm_spank_local_user_init(spank_t sp, int ac, char** av) { + + (void) sp; + (void) ac; + (void) av; + + return 0; +} + +/** + * Called after privileges are temporarily dropped. (remote context only) + * + * ┌------------------------┐ + * | Command | Context | + * ├---------|--------------┤ + * | srun | S_CTX_REMOTE | + * | salloc | S_CTX_REMOTE | + * | sbatch | S_CTX_REMOTE | + * └------------------------┘ + * + * Available in the following contexts: + * S_CTX_REMOTE (slurmstepd) + */ +int +slurm_spank_user_init(spank_t sp, int ac, char** av) { + + (void) sp; + (void) ac; + (void) av; + + /* No ADMIRE options were passed to the job, nothing to do here */ + if(!scord_flag) { + return 0; + } + + /* Get relative for the node executing id. Job registration is only done + * by the node with ID 0 */ + spank_err_t rc; + uint32_t nodeid; + + if((rc = spank_get_item(sp, S_JOB_NODEID, &nodeid)) != ESPANK_SUCCESS) { + slurm_error("%s: failed to get node id: %s", plugin_name, + spank_strerror(rc)); + return -1; + } + + slurm_debug("%s: %s: node id: %d", plugin_name, __func__, nodeid); + + if(nodeid != 0) { + return 0; + } + + scord_plugin_config_t cfg = default_cfg; + + if(process_config(ac, av, &cfg) != 0) { + return -1; + } + + /* get job id */ + uint32_t jobid; + + if((rc = spank_get_item(sp, S_JOB_ID, &jobid)) != ESPANK_SUCCESS) { + slurm_error("%s: failed to get jobid: %s", plugin_name, + spank_strerror(rc)); + return -1; + } + + slurm_debug("%s: %s: job id: %d", plugin_name, __func__, jobid); + + /* list of job nodes */ + hostlist_t hostlist = get_slurm_hostlist(sp); + if(!hostlist) { + slurm_error("%s: failed to retrieve hostlist", plugin_name); + return -1; + } + + char buf[256]; + slurm_hostlist_ranged_string(hostlist, sizeof(buf), buf); + slurm_debug("%s: %s: hostlist: %s", plugin_name, __func__, buf); + + scord_nodelist_t nodelist = scord_nodelist_create(hostlist); + + int ec; + + if(!nodelist) { + slurm_error("%s: failed to create nodelist", plugin_name); + ec = -1; + goto cleanup; + } + + if((ec = scord_register_job(cfg, nodelist, jobid)) != 0) { + slurm_error("%s: failed to register job with scord", plugin_name); + ec = -1; + goto cleanup; + } + +cleanup: + if(cfg.scordctl_info.addr) { + free((void*) cfg.scordctl_info.addr); + } + + scord_nodelist_destroy(nodelist); + slurm_hostlist_destroy(hostlist); + + return ec; +} diff --git a/plugins/slurm/utils.c b/plugins/slurm/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..2107498c256b3f1c27fddec1c2921799de3abfe5 --- /dev/null +++ b/plugins/slurm/utils.c @@ -0,0 +1,246 @@ +/****************************************************************************** + * Copyright 2022-2023, Inria, France. + * Copyright 2023, Barcelona Supercomputing Center (BSC), Spain. + * All rights reserved. + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the Lesser GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the Lesser GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: LGPL-3.0-or-later + *****************************************************************************/ + +#include +#include +#include +#include +#include "utils.h" + +extern const char plugin_name[]; + +hostlist_t +get_slurm_hostlist(spank_t sp) { + + /* get list of nodes. /!\ at this point env SLURM_NODELIST is + set, but not SLURM_JOB_NODELIST! */ + + char* nodelist = NULL; + + spank_context_t sctx = spank_context(); + + if(sctx != S_CTX_LOCAL && sctx != S_CTX_ALLOCATOR && sctx != S_CTX_REMOTE) { + return NULL; + } + + if(sctx == S_CTX_LOCAL || sctx == S_CTX_ALLOCATOR) { + nodelist = getenv("SLURM_NODELIST"); + + if(!nodelist) { + slurm_error("%s: failed to get SLURM_NODELIST", plugin_name); + return NULL; + } + } else { + + spank_err_t ec = ESPANK_SUCCESS; + int size = 256; + char* buffer = malloc(sizeof(char) * size); + + ec = spank_getenv(sp, "SLURM_NODELIST", buffer, size); + + if(ec != ESPANK_SUCCESS) { + slurm_error("%s: failed to get SLURM_NODELIST: %s", plugin_name, + spank_strerror(ec)); + return NULL; + } + + nodelist = buffer; + } + + slurm_debug("%s: SLURM_NODELIST=%s", plugin_name, nodelist); + + hostlist_t hl = NULL; + hl = slurm_hostlist_create(nodelist); + + if(!hl) { + slurm_error("%s: slurm_hostlist_create() failed", plugin_name); + return NULL; + } + + return hl; +} + +scord_nodelist_t +scord_nodelist_create(hostlist_t hostlist) { + + ADM_node_t* nodes = NULL; + char* host = NULL; + + /* get number of nodes */ + int n = slurm_hostlist_count(hostlist); + if(n <= 0) { + slurm_error("%s: slurm_hostlist_count() failed", plugin_name); + goto error; + } + + /* allocate array of ADM_node_t */ + nodes = calloc(n, sizeof(ADM_node_t)); + if(!nodes) { + slurm_error("%s: calloc() failed", plugin_name); + goto error; + } + + /* fill array of ADM_node_t */ + for(int i = 0; i < n; i++) { + host = slurm_hostlist_shift(hostlist); + if(!host) { + slurm_error("%s: slurm_hostlist_shift() failed", plugin_name); + goto error; + } + + nodes[i] = ADM_node_create(host, ADM_NODE_REGULAR); + + if(!nodes[i]) { + slurm_error("%s: ADM_node_create() failed", plugin_name); + goto error; + } + } + + scord_nodelist_t nodelist = calloc(1, sizeof(struct scord_nodelist)); + + if(!nodelist) { + slurm_error("%s: calloc() failed", plugin_name); + goto error; + } + + nodelist->nodes = nodes; + nodelist->nnodes = n; + + return nodelist; + +error: + if(nodes) { + for(int i = 0; i < n; i++) { + if(nodes[i]) { + ADM_node_destroy(nodes[i]); + } + } + free(nodes); + } + + return NULL; +} + +int +scord_nodelist_get_nodecount(scord_nodelist_t nodelist) { + return nodelist ? (int) nodelist->nnodes : -1; +} + +ADM_node_t* +scord_nodelist_get_nodes(scord_nodelist_t nodelist) { + if(!nodelist) { + return NULL; + } + return nodelist->nodes; +} + +ADM_node_t +scord_nodelist_get_node(scord_nodelist_t nodelist, int index) { + if(!nodelist || index < 0 || index >= nodelist->nnodes) { + return NULL; + } + return nodelist->nodes[index]; +} + +void +scord_nodelist_destroy(scord_nodelist_t nodelist) { + if(nodelist) { + if(nodelist->nodes) { + for(ssize_t i = 0; i < nodelist->nnodes; i++) { + if(nodelist->nodes[i]) { + ADM_node_destroy(nodelist->nodes[i]); + } + } + free(nodelist->nodes); + } + free(nodelist); + } +} + +int +resolve_host(const char* hostname, char* buffer) { + + struct addrinfo hints, *result; + int rv; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_DGRAM; + + if((rv = getaddrinfo(hostname, NULL, &hints, &result)) != 0) { + return rv; + } + + // we only return the first AF_INET address + for(struct addrinfo* rp = result; rp != NULL; rp = rp->ai_next) { + switch(rp->ai_family) { + case AF_INET: + inet_ntop(AF_INET, + &((struct sockaddr_in*) rp->ai_addr)->sin_addr, + buffer, INET_ADDRSTRLEN); + freeaddrinfo(result); + return 0; + + default: + continue; + } + } + + freeaddrinfo(result); + return EAI_NONAME; +} + +const char* +margo_address_create(const char* protocol, const char* hostname, int port) { + + const char sep[] = "://"; + + if(!protocol) { + return strndup(hostname, strlen(hostname)); + } + + if(!hostname) { + return NULL; + } + + int rv; + char buffer[INET_ADDRSTRLEN]; + if((rv = resolve_host(hostname, buffer)) != 0) { + slurm_error("%s: resolve_host() failed: %s", plugin_name, + gai_strerror(rv)); + return NULL; + } + + size_t n = snprintf(NULL, 0, "%s%s%s:%d", protocol, sep, buffer, port); + char* addr = malloc(n + 1); + + if(!addr) { + return NULL; + } + + snprintf(addr, n + 1, "%s%s%s:%d", protocol, sep, buffer, port); + return addr; +} diff --git a/plugins/slurm/utils.h b/plugins/slurm/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..46ed86864cd50e4eca43e0678c9cd573b20464ca --- /dev/null +++ b/plugins/slurm/utils.h @@ -0,0 +1,59 @@ +/****************************************************************************** + * Copyright 2022-2023, Inria, France. + * Copyright 2023, Barcelona Supercomputing Center (BSC), Spain. + * All rights reserved. + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the Lesser GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the Lesser GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: LGPL-3.0-or-later + *****************************************************************************/ + +#ifndef SCORD_SLURM_PLUGIN_UTILS_H +#define SCORD_SLURM_PLUGIN_UTILS_H + +#include +#include + +hostlist_t +get_slurm_hostlist(spank_t sp); + +typedef struct scord_nodelist { + ADM_node_t* nodes; + ssize_t nnodes; +}* scord_nodelist_t; + +scord_nodelist_t +scord_nodelist_create(hostlist_t hostlist); + +int +scord_nodelist_get_nodecount(scord_nodelist_t nodelist); + +ADM_node_t* +scord_nodelist_get_nodes(scord_nodelist_t nodelist); + +ADM_node_t +scord_nodelist_get_node(scord_nodelist_t nodelist, int index); + +void +scord_nodelist_destroy(scord_nodelist_t nodelist); + +const char* +margo_address_create(const char* protocol, const char* hostname, int port); + +#endif // SCORD_SLURM_PLUGIN_UTILS_H diff --git a/src/scord-ctl/CMakeLists.txt b/src/scord-ctl/CMakeLists.txt index dbfb6a330875f4469afa390aa99cb234160f3ceb..7e97d3588142b741ffa138d4ba860a4005a887b4 100644 --- a/src/scord-ctl/CMakeLists.txt +++ b/src/scord-ctl/CMakeLists.txt @@ -44,4 +44,16 @@ target_link_libraries( libscord_cxx_types fmt::fmt CLI11::CLI11 ryml::ryml ) +set(SCORD_CTL_BIN "${CMAKE_INSTALL_FULL_BINDIR}/scord-ctl") + +define_property( + TARGET PROPERTY SCORD_CTL_BINARY + BRIEF_DOCS "Path to scord-ctl binary" + FULL_DOCS "Path to scord-ctl binary" +) + +set_target_properties( + scord-ctl PROPERTIES SCORD_CTL_BINARY ${CMAKE_INSTALL_FULL_BINDIR}/scord-ctl +) + install(TARGETS scord-ctl DESTINATION ${CMAKE_INSTALL_BINDIR})