From 5ee8ca303cd51df0352627e112bc3b2e1371777d Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 1 Feb 2024 16:10:11 +0100 Subject: [PATCH 1/2] Bugfixing root installation --- plugins/slurm/defaults.h.in | 2 +- plugins/slurm/scord_common.sh.in | 2 +- plugins/slurm/scord_prolog.sh.in | 8 ++++---- plugins/slurm/systemd/cargo@.service.in | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/plugins/slurm/defaults.h.in b/plugins/slurm/defaults.h.in index 9de23bc6..7dc3b7c3 100644 --- a/plugins/slurm/defaults.h.in +++ b/plugins/slurm/defaults.h.in @@ -39,7 +39,7 @@ #define SCORDCTL_TMPDIR_DEFAULT "/tmp" #define CARGO_PROG_DEFAULT "@CARGO_PROGRAM@" #define CARGO_PROTO_DEFAULT SCORD_PROTO_DEFAULT -#define CARGO_PORT_DEFAULT 62000 +#define CARGO_PORT_DEFAULT @CARGO_BIND_PORT@ // clang-format on diff --git a/plugins/slurm/scord_common.sh.in b/plugins/slurm/scord_common.sh.in index f67242b3..ed4eff01 100644 --- a/plugins/slurm/scord_common.sh.in +++ b/plugins/slurm/scord_common.sh.in @@ -183,4 +183,4 @@ export SCORD_QUERY_PROGRAM="@SCORD_QUERY_PROGRAM@" export SCORDCTL_PROGRAM="@SCORDCTL_PROGRAM@" export SCORDCTL_PROTO="@SCORD_TRANSPORT_PROTOCOL@" export SCORDCTL_PORT="@SCORD_CTL_BIND_PORT@" -export CARGO_PORT="@CARGO_PORT@" +export CARGO_PORT="@CARGO_BIND_PORT@" diff --git a/plugins/slurm/scord_prolog.sh.in b/plugins/slurm/scord_prolog.sh.in index 8389cf14..d67e0ece 100755 --- a/plugins/slurm/scord_prolog.sh.in +++ b/plugins/slurm/scord_prolog.sh.in @@ -143,7 +143,7 @@ fi # stored (note that $HOME is not set when this prolog script is being executed). # shellcheck disable=SC2016 USER_HOME=$(run_as "$SLURM_JOB_USER" echo '$HOME') -USER_CONFIG_DIRECTORY="${XDG_CONFIG_HOME:-$USER_HOME/.config}" +USER_CONFIG_DIRECTORY="$USER_HOME/.config" CARGO_CONFIG_DIRECTORY="$USER_CONFIG_DIRECTORY/cargo" SYSTEMD_USER_DIRECTORY="$USER_CONFIG_DIRECTORY/systemd/user" @@ -189,16 +189,16 @@ CARGO_HOSTS=$hostnames_csv CARGO_NUM_NODES=$CARGO_NUM_NODES CARGO_ADDRESS=$CARGO_MASTER_ADDRESS EOT - +CUID=$(id -u $SLURM_JOB_USER) chown "$SLURM_JOB_USER":"$SLURM_JOB_GROUP" "$CARGO_CONFIG_FILE" -if ! run_as "$SLURM_JOB_USER" systemctl --user start "$CARGO_INSTANCE_NAME"; then +if ! run_as "$SLURM_JOB_USER" DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/$CUID/bus systemctl --user start "$CARGO_INSTANCE_NAME"; then exit 1 fi sleep 1s -if ! run_as "$SLURM_JOB_USER" systemctl --user is-active --quiet "$CARGO_INSTANCE_NAME"; then +if ! run_as "$SLURM_JOB_USER" DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/$CUID/bus systemctl --user is-active --quiet "$CARGO_INSTANCE_NAME"; then echo "Cargo data stager failed to start" exit 1 fi diff --git a/plugins/slurm/systemd/cargo@.service.in b/plugins/slurm/systemd/cargo@.service.in index a04872b8..b0bcb46b 100644 --- a/plugins/slurm/systemd/cargo@.service.in +++ b/plugins/slurm/systemd/cargo@.service.in @@ -3,7 +3,7 @@ Description=Cargo parallel data stager [Service] Type=simple -EnvironmentFile=%S/cargo/%I.cfg +EnvironmentFile=%h/.config/cargo/%I.cfg ExecStart=@CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/slurm/cargoctl start -s ${CARGO_ADDRESS} -H ${CARGO_HOSTS} -n ${CARGO_NUM_NODES} ExecStop=@CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/slurm/cargoctl stop -s ${CARGO_ADDRESS} Restart=no -- GitLab From 08da45c375d02283cbfc8d3ec57a16a0103d70ed Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Mon, 5 Feb 2024 07:34:16 +0100 Subject: [PATCH 2/2] Changed Thallium sleep to sleep to remove CPU hog --- src/scord/rpc_server.cpp | 3 ++- src/scord/rpc_server.hpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/scord/rpc_server.cpp b/src/scord/rpc_server.cpp index 10bb1e41..1223d115 100644 --- a/src/scord/rpc_server.cpp +++ b/src/scord/rpc_server.cpp @@ -881,7 +881,8 @@ rpc_server::scheduler_update() { std::vector> return_set; const auto threshold = 0.1f; while(!m_shutting_down) { - thallium::thread::self().sleep(m_network_engine, 500); + sleep(1); + //thallium::thread::self().sleep(m_network_engine, 500); m_transfer_manager.lock(); const auto transfer = m_transfer_manager.transfer(); std::vector v_ids; diff --git a/src/scord/rpc_server.hpp b/src/scord/rpc_server.hpp index e4257081..92322266 100644 --- a/src/scord/rpc_server.hpp +++ b/src/scord/rpc_server.hpp @@ -114,8 +114,8 @@ private: adhoc_storage_manager m_adhoc_manager; pfs_storage_manager m_pfs_manager; transfer_manager m_transfer_manager; - - // Dedicated execution stream for the MPI listener ULT + + // Dedicated execution stream for the Scheduler listener ULT thallium::managed m_scheduler_ess; // ULT for the MPI listener thallium::managed m_scheduler_ult; -- GitLab