From 2676e81d8230db573b26781e235160e5611cd63e Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Mon, 1 Apr 2024 16:07:31 +0200 Subject: [PATCH 1/9] SLURM 23 changes hostlist_t signature, we need a * now --- plugins/slurm/slurmadmcli.c | 2 +- plugins/slurm/utils.c | 6 +++--- plugins/slurm/utils.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins/slurm/slurmadmcli.c b/plugins/slurm/slurmadmcli.c index 61d10078..aad632c4 100644 --- a/plugins/slurm/slurmadmcli.c +++ b/plugins/slurm/slurmadmcli.c @@ -740,7 +740,7 @@ slurm_spank_user_init(spank_t sp, int ac, char** av) { slurm_debug("%s: %s: job id: %d", plugin_name, __func__, jobid); /* list of job nodes */ - hostlist_t hostlist = get_slurm_hostlist(sp); + hostlist_t*hostlist = get_slurm_hostlist(sp); if(!hostlist) { slurm_error("%s: failed to retrieve hostlist", plugin_name); return -1; diff --git a/plugins/slurm/utils.c b/plugins/slurm/utils.c index 2107498c..cd06c733 100644 --- a/plugins/slurm/utils.c +++ b/plugins/slurm/utils.c @@ -32,7 +32,7 @@ extern const char plugin_name[]; -hostlist_t +hostlist_t * get_slurm_hostlist(spank_t sp) { /* get list of nodes. /!\ at this point env SLURM_NODELIST is @@ -72,7 +72,7 @@ get_slurm_hostlist(spank_t sp) { slurm_debug("%s: SLURM_NODELIST=%s", plugin_name, nodelist); - hostlist_t hl = NULL; + hostlist_t * hl = NULL; hl = slurm_hostlist_create(nodelist); if(!hl) { @@ -84,7 +84,7 @@ get_slurm_hostlist(spank_t sp) { } scord_nodelist_t -scord_nodelist_create(hostlist_t hostlist) { +scord_nodelist_create(hostlist_t* hostlist) { ADM_node_t* nodes = NULL; char* host = NULL; diff --git a/plugins/slurm/utils.h b/plugins/slurm/utils.h index 46ed8686..3405c528 100644 --- a/plugins/slurm/utils.h +++ b/plugins/slurm/utils.h @@ -30,7 +30,7 @@ #include #include -hostlist_t +hostlist_t * get_slurm_hostlist(spank_t sp); typedef struct scord_nodelist { @@ -39,7 +39,7 @@ typedef struct scord_nodelist { }* scord_nodelist_t; scord_nodelist_t -scord_nodelist_create(hostlist_t hostlist); +scord_nodelist_create(hostlist_t* hostlist); int scord_nodelist_get_nodecount(scord_nodelist_t nodelist); -- GitLab From fe41a711da3fefad2c2afa7f8f7b836f0abaabce Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 10 Apr 2024 13:15:14 +0200 Subject: [PATCH 2/9] Solved some issues with systemd --- plugins/adhoc_services.d/gekkofs.sh | 6 +++--- plugins/slurm/scord_epilog.sh.in | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/plugins/adhoc_services.d/gekkofs.sh b/plugins/adhoc_services.d/gekkofs.sh index be65b8f2..57fed5af 100644 --- a/plugins/adhoc_services.d/gekkofs.sh +++ b/plugins/adhoc_services.d/gekkofs.sh @@ -15,8 +15,8 @@ if [ "$1" == "start" ]; then datadir=$7 mountdir=$9 unset SLURM_CPU_BIND SLURM_CPU_BIND_LIST SLURM_CPU_BIND_TYPE SLURM_CPU_BIND_VERBOSE - srun -N $num_nodes -n $num_nodes --oversubscribe --cpus-per-task=1 --mem-per-cpu=1 --export=ALL bash -c "mkdir -p $mountdir; mkdir -p $datadir" - srun -N $num_nodes -n $num_nodes --oversubscribe --cpus-per-task=4 --mem-per-cpu=1 --export=ALL bash -c "gkfs_daemon --rootdir $datadir --mountdir $mountdir" & + srun -N $num_nodes -n $num_nodes --oversubscribe --overlap --cpus-per-task=1 --mem-per-cpu=1 --export=ALL bash -c "mkdir -p $mountdir; mkdir -p $datadir" + srun -N $num_nodes -n $num_nodes --oversubscribe --overlap --cpus-per-task=4 --mem-per-cpu=1 --export=ALL bash -c "gkfs_daemon --rootdir $datadir --mountdir $mountdir" & sleep 4 elif [ "$1" == "stop" ]; then echo "Stopping GEKKOFS" @@ -28,7 +28,7 @@ elif [ "$1" == "stop" ]; then exit 0 fi unset SLURM_CPU_BIND SLURM_CPU_BIND_LIST SLURM_CPU_BIND_TYPE SLURM_CPU_BIND_VERBOSE - srun -N $num_nodes -n $num_nodes --oversubscribe --cpus-per-task=1 --mem-per-cpu=1 pkill -9 gkfs_daemon + srun -N $num_nodes -n $num_nodes --overlap --oversubscribe --cpus-per-task=1 --mem-per-cpu=1 --export=ALL bash -c "pkill -9 gkfs_daemon" elif [ "$1" == "expand" ]; then echo "Expand command" elif [ "$1" == "shrink" ]; then diff --git a/plugins/slurm/scord_epilog.sh.in b/plugins/slurm/scord_epilog.sh.in index afb047de..516d7a8b 100755 --- a/plugins/slurm/scord_epilog.sh.in +++ b/plugins/slurm/scord_epilog.sh.in @@ -108,8 +108,8 @@ CARGO_CONFIG_FILE=$CONFIG_DIRECTORY/$CARGO_ID.cfg CARGO_SERVICE_NAME=$(systemd-escape --template cargo@.service "$CARGO_ID") echo "Shutting down Cargo data stager for job $SLURM_JOB_ID (user: $SLURM_JOB_USER)" - -if ! run_as "$SLURM_JOB_USER" systemctl --user stop "$CARGO_SERVICE_NAME"; then +CUID=$(id -u $SLURM_JOB_USER) +if ! run_as "$SLURM_JOB_USER" DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/$CUID/bus systemctl --user stop "$CARGO_SERVICE_NAME"; then exit 1 fi -- GitLab From ef504e457d18ee5e8ef36b18cc0ec916377fb561 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 10 Apr 2024 21:12:42 +0200 Subject: [PATCH 3/9] added adhoc uuid to query --- cli/scord_query.cpp | 3 ++- src/lib/scord/types.hpp | 11 +++++++++-- src/scord/rpc_server.cpp | 4 ++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/cli/scord_query.cpp b/cli/scord_query.cpp index d0b9b44f..9dfec28f 100644 --- a/cli/scord_query.cpp +++ b/cli/scord_query.cpp @@ -85,8 +85,9 @@ main(int argc, char* argv[]) { fmt::print(stdout, "Job metadata:\n" " adhoc_controller: {}\n" + " adhoc_uuid: {}\n" " io_procs: {}\n", - info.adhoc_controller_address(), info.io_procs()); + info.adhoc_controller_address(), info.uuid(), info.io_procs()); } catch(const std::exception& ex) { fmt::print(stderr, "Error: {}\n", ex.what()); diff --git a/src/lib/scord/types.hpp b/src/lib/scord/types.hpp index 009240b6..7a666848 100644 --- a/src/lib/scord/types.hpp +++ b/src/lib/scord/types.hpp @@ -493,9 +493,9 @@ class job_info { public: job_info() = default; - explicit job_info(std::string adhoc_controller_address, + explicit job_info(std::string adhoc_controller_address,std::string uuid, std::uint32_t procs_for_io) - : m_adhoc_address(std::move(adhoc_controller_address)), + : m_adhoc_address(std::move(adhoc_controller_address)), m_uuid(std::move(uuid)), m_procs_for_io(procs_for_io) {} constexpr std::string const& @@ -512,16 +512,23 @@ public: return m_procs_for_io; } + constexpr std::string const& + uuid() const { + return m_uuid; + } + private: friend class cereal::access; template void serialize(Archive& ar) { ar & m_adhoc_address; + ar & m_uuid; ar & m_procs_for_io; } std::string m_adhoc_address; + std::string m_uuid; std::uint32_t m_procs_for_io; }; diff --git a/src/scord/rpc_server.cpp b/src/scord/rpc_server.cpp index da3027a3..4684b809 100644 --- a/src/scord/rpc_server.cpp +++ b/src/scord/rpc_server.cpp @@ -164,9 +164,13 @@ rpc_server::query(const network::request& req, slurm_job_id job_id) { return tl::make_unexpected( error_code::no_resources); } + + + return job_info{ job_metadata_ptr->adhoc_storage_metadata() ->controller_address(), + job_metadata_ptr->adhoc_storage_metadata()->uuid(), job_metadata_ptr->io_procs()}; }); -- GitLab From 01ac95c79192997d53157286b665299bf4fb8e18 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 11 Apr 2024 14:38:44 +0200 Subject: [PATCH 4/9] Solved issue with gekkofs type in rpc --- cli/scord_query.cpp | 6 +++-- plugins/slurm/slurmadmcli.c | 53 ++++++++++++++++++++++++++++++++++--- src/scord/rpc_server.cpp | 2 +- 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/cli/scord_query.cpp b/cli/scord_query.cpp index 9dfec28f..b706579d 100644 --- a/cli/scord_query.cpp +++ b/cli/scord_query.cpp @@ -31,6 +31,7 @@ struct query_config { std::string progname; std::string server_address; std::uint32_t job_id{}; + bool verbose{}; }; query_config @@ -47,6 +48,7 @@ parse_command_line(int argc, char* argv[]) { ->required(); app.add_option("job_id", cfg.job_id, "Job ID")->required(); + app.add_option("-v, --verbose ", cfg.verbose, "Enable verbose output"); try { app.parse(argc, argv); return cfg; @@ -81,14 +83,14 @@ main(int argc, char* argv[]) { scord::job_info info = scord::query(scord::server{protocol, address}, cfg.job_id); - + if (cfg.verbose) fmt::print(stdout, "Job metadata:\n" " adhoc_controller: {}\n" " adhoc_uuid: {}\n" " io_procs: {}\n", info.adhoc_controller_address(), info.uuid(), info.io_procs()); - + else fmt::print(stdout,"{}\n", info.uuid()); } catch(const std::exception& ex) { fmt::print(stderr, "Error: {}\n", ex.what()); return EXIT_FAILURE; diff --git a/plugins/slurm/slurmadmcli.c b/plugins/slurm/slurmadmcli.c index aad632c4..b460d844 100644 --- a/plugins/slurm/slurmadmcli.c +++ b/plugins/slurm/slurmadmcli.c @@ -33,6 +33,8 @@ #include #include +#include +#include #include "defaults.h" #include "utils.h" @@ -436,8 +438,8 @@ process_config(int ac, char** av, scord_plugin_config_t* cfg) { } static int -scord_register_job(scord_plugin_config_t cfg, scord_nodelist_t nodelist, - uint32_t jobid) { +scord_register_job(spank_t sp, scord_plugin_config_t cfg, + scord_nodelist_t nodelist, uint32_t jobid) { int rc = 0; int nnodes = 0; @@ -450,6 +452,7 @@ scord_register_job(scord_plugin_config_t cfg, scord_nodelist_t nodelist, ADM_adhoc_storage_t adhoc_storage = NULL; ADM_job_requirements_t scord_reqs = NULL; ADM_job_t scord_job = NULL; + ADM_transfer_t transfer = NULL; char* adhoc_path = NULL; /* First determine the node on which to launch scord-ctl (typically the @@ -578,6 +581,48 @@ scord_register_job(scord_plugin_config_t cfg, scord_nodelist_t nodelist, goto end; } + // define the environment variables for the job + switch(adhoc_type) { + case ADM_ADHOC_STORAGE_GEKKOFS: + spank_setenv(sp, "ADHOC_TYPE", "gekkofs", 1); + + spank_setenv(sp, "LIBGKFS_HOSTS_FILE", "/tmp/gekkofs/gkfs_hosts.txt", 1); + break; + case ADM_ADHOC_STORAGE_EXPAND: + spank_setenv(sp, "ADHOC_TYPE", "expand", 1); + break; + case ADM_ADHOC_STORAGE_DATACLAY: + spank_setenv(sp, "ADHOC_TYPE", "dataclay", 1); + break; + case ADM_ADHOC_STORAGE_HERCULES: + spank_setenv(sp, "ADHOC_TYPE", "hercules", 1); + break; + + } + spank_setenv(sp, "ADHOC_PATH", adhoc_path, 1); + + + // divide input_datasets into sources and targets + ADM_dataset_t * sources = malloc((input_datasets_count) * sizeof(ADM_dataset_t)); + ADM_dataset_t * targets = malloc((input_datasets_count) * sizeof(ADM_dataset_t));; + + for (unsigned int i = 0; i < input_datasets_count; i++) { + //ADM_dataset_route_list_t r_inputs; + sources[i] = scord_reqs->r_inputs->l_routes[i].d_src; + targets[i] = scord_reqs->r_inputs->l_routes[i].d_dst; + } + + if (ADM_transfer_datasets(scord_server, scord_job, sources, + input_datasets_count, + targets, + input_datasets_count, 0, 0, + ADM_MAPPING_ONE_TO_ONE, &transfer)!= + ADM_SUCCESS) { + slurm_error("%s: adhoc storage transfer failed", plugin_name); + rc = -1; + goto end; + } + end: if(adhoc_path) { free(adhoc_path); @@ -740,7 +785,7 @@ slurm_spank_user_init(spank_t sp, int ac, char** av) { slurm_debug("%s: %s: job id: %d", plugin_name, __func__, jobid); /* list of job nodes */ - hostlist_t*hostlist = get_slurm_hostlist(sp); + hostlist_t* hostlist = get_slurm_hostlist(sp); if(!hostlist) { slurm_error("%s: failed to retrieve hostlist", plugin_name); return -1; @@ -760,7 +805,7 @@ slurm_spank_user_init(spank_t sp, int ac, char** av) { goto cleanup; } - if((ec = scord_register_job(cfg, nodelist, jobid)) != 0) { + if((ec = scord_register_job(sp, cfg, nodelist, jobid)) != 0) { slurm_error("%s: failed to register job with scord", plugin_name); ec = -1; goto cleanup; diff --git a/src/scord/rpc_server.cpp b/src/scord/rpc_server.cpp index 4684b809..eca36f9e 100644 --- a/src/scord/rpc_server.cpp +++ b/src/scord/rpc_server.cpp @@ -52,7 +52,7 @@ dataset_process(std::string id) { type = cargo::dataset::type::parallel; } else if(id.find("gekkofs:") != std::string::npos) { id = id.substr(strlen("gekkofs:")); - type = cargo::dataset::type::posix; + type = cargo::dataset::type::gekkofs; } else if(id.find("hercules:") != std::string::npos) { id = id.substr(strlen("hercules:")); type = cargo::dataset::type::hercules; -- GitLab From 6152a5f2b67c1acb20f85b7023000e974ec7e268 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 11 Apr 2024 19:16:12 +0200 Subject: [PATCH 5/9] Stage-Out --- plugins/slurm/slurmadmcli.c | 407 ++++++++++++++++++++---- plugins/slurm/systemd/cargo@.service.in | 2 +- src/lib/c_wrapper.cpp | 34 +- src/lib/scord/scord.h | 2 +- 4 files changed, 375 insertions(+), 70 deletions(-) diff --git a/plugins/slurm/slurmadmcli.c b/plugins/slurm/slurmadmcli.c index b460d844..70076867 100644 --- a/plugins/slurm/slurmadmcli.c +++ b/plugins/slurm/slurmadmcli.c @@ -38,6 +38,12 @@ #include "defaults.h" #include "utils.h" +#include +#include +#include +#include +#include + /** * Slurm SPANK plugin to handle the ADMIRE adhoc storage CLI. Options are * forwarded to scord on srun, salloc and sbatch. See the struct spank_option @@ -436,7 +442,16 @@ process_config(int ac, char** av, scord_plugin_config_t* cfg) { return 0; } - +ADM_server_t scord_server = NULL; +ADM_node_t* nodes = NULL; +ADM_job_resources_t job_resources = NULL; +ADM_adhoc_resources_t adhoc_resources = NULL; +ADM_adhoc_context_t adhoc_ctx = NULL; +ADM_adhoc_storage_t adhoc_storage = NULL; +ADM_job_requirements_t scord_reqs = NULL; +ADM_job_t scord_job = NULL; +ADM_transfer_t transfer = NULL; +char* adhoc_path = NULL; static int scord_register_job(spank_t sp, scord_plugin_config_t cfg, scord_nodelist_t nodelist, uint32_t jobid) { @@ -444,16 +459,16 @@ scord_register_job(spank_t sp, scord_plugin_config_t cfg, int rc = 0; int nnodes = 0; - ADM_server_t scord_server = NULL; - ADM_node_t* nodes = NULL; - ADM_job_resources_t job_resources = NULL; - ADM_adhoc_resources_t adhoc_resources = NULL; - ADM_adhoc_context_t adhoc_ctx = NULL; - ADM_adhoc_storage_t adhoc_storage = NULL; - ADM_job_requirements_t scord_reqs = NULL; - ADM_job_t scord_job = NULL; - ADM_transfer_t transfer = NULL; - char* adhoc_path = NULL; + /* ADM_server_t scord_server = NULL; + ADM_node_t* nodes = NULL; + ADM_job_resources_t job_resources = NULL; + ADM_adhoc_resources_t adhoc_resources = NULL; + ADM_adhoc_context_t adhoc_ctx = NULL; + ADM_adhoc_storage_t adhoc_storage = NULL; + ADM_job_requirements_t scord_reqs = NULL; + ADM_job_t scord_job = NULL; + ADM_transfer_t transfer = NULL; + char* adhoc_path = NULL; */ /* First determine the node on which to launch scord-ctl (typically the * first node of the allocation) */ @@ -585,8 +600,9 @@ scord_register_job(spank_t sp, scord_plugin_config_t cfg, switch(adhoc_type) { case ADM_ADHOC_STORAGE_GEKKOFS: spank_setenv(sp, "ADHOC_TYPE", "gekkofs", 1); - - spank_setenv(sp, "LIBGKFS_HOSTS_FILE", "/tmp/gekkofs/gkfs_hosts.txt", 1); + + spank_setenv(sp, "LIBGKFS_HOSTS_FILE", + "/tmp/gekkofs/gkfs_hosts.txt", 1); break; case ADM_ADHOC_STORAGE_EXPAND: spank_setenv(sp, "ADHOC_TYPE", "expand", 1); @@ -597,65 +613,69 @@ scord_register_job(spank_t sp, scord_plugin_config_t cfg, case ADM_ADHOC_STORAGE_HERCULES: spank_setenv(sp, "ADHOC_TYPE", "hercules", 1); break; - } spank_setenv(sp, "ADHOC_PATH", adhoc_path, 1); - - // divide input_datasets into sources and targets - ADM_dataset_t * sources = malloc((input_datasets_count) * sizeof(ADM_dataset_t)); - ADM_dataset_t * targets = malloc((input_datasets_count) * sizeof(ADM_dataset_t));; - - for (unsigned int i = 0; i < input_datasets_count; i++) { - //ADM_dataset_route_list_t r_inputs; - sources[i] = scord_reqs->r_inputs->l_routes[i].d_src; - targets[i] = scord_reqs->r_inputs->l_routes[i].d_dst; - } - - if (ADM_transfer_datasets(scord_server, scord_job, sources, - input_datasets_count, - targets, - input_datasets_count, 0, 0, - ADM_MAPPING_ONE_TO_ONE, &transfer)!= - ADM_SUCCESS) { - slurm_error("%s: adhoc storage transfer failed", plugin_name); - rc = -1; - goto end; - } - -end: - if(adhoc_path) { - free(adhoc_path); - } - - if(scord_job) { - ADM_job_destroy(scord_job); - } - - if(scord_reqs) { - ADM_job_requirements_destroy(scord_reqs); - } - - if(adhoc_storage) { - ADM_adhoc_storage_destroy(adhoc_storage); - } - - if(adhoc_ctx) { - ADM_adhoc_context_destroy(adhoc_ctx); - } - - if(adhoc_resources) { - ADM_adhoc_resources_destroy(adhoc_resources); - } - - if(job_resources) { - ADM_job_resources_destroy(job_resources); + if(input_datasets_count > 0) { + // divide input_datasets into sources and targets + ADM_dataset_t* sources = + malloc((input_datasets_count) * sizeof(ADM_dataset_t)); + ADM_dataset_t* targets = + malloc((input_datasets_count) * sizeof(ADM_dataset_t)); + ; + + for(unsigned int i = 0; i < input_datasets_count; i++) { + // ADM_dataset_route_list_t r_inputs; + sources[i] = scord_reqs->r_inputs->l_routes[i].d_src; + targets[i] = scord_reqs->r_inputs->l_routes[i].d_dst; + } + // Unfortunaly we have to sleep or cargo will not find the instance up. + sleep(5); + + if(ADM_transfer_datasets( + scord_server, scord_job, sources, input_datasets_count, + targets, input_datasets_count, 0, 0, ADM_MAPPING_ONE_TO_ONE, + &transfer, true) != ADM_SUCCESS) { + slurm_error("%s: adhoc storage transfer failed", plugin_name); + rc = -1; + goto end; + } } - if(scord_server) { - ADM_server_destroy(scord_server); - } +end: + /* if(adhoc_path) { + free(adhoc_path); + } + + if(scord_job) { + ADM_job_destroy(scord_job); + } + + if(scord_reqs) { + ADM_job_requirements_destroy(scord_reqs); + } + + if(adhoc_storage) { + ADM_adhoc_storage_destroy(adhoc_storage); + } + + if(adhoc_ctx) { + ADM_adhoc_context_destroy(adhoc_ctx); + } + + if(adhoc_resources) { + ADM_adhoc_resources_destroy(adhoc_resources); + } + + if(job_resources) { + ADM_job_resources_destroy(job_resources); + } + + if(scord_server) { + ADM_server_destroy(scord_server); + } + */ return rc; } @@ -821,3 +841,258 @@ cleanup: return ec; } + + +void +remove_dir_content(const char* path) { + struct dirent* de; + char fname[300]; + DIR* dr = opendir(path); + if(dr == NULL) { + return; + } + while((de = readdir(dr)) != NULL) { + int ret = -1; + struct stat statbuf; + sprintf(fname, "%s/%s", path, de->d_name); + if(!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..")) + continue; + if(!stat(fname, &statbuf)) { + if(S_ISDIR(statbuf.st_mode)) { + + ret = unlinkat(dirfd(dr), fname, AT_REMOVEDIR); + if(ret != 0) { + remove_dir_content(fname); + ret = unlinkat(dirfd(dr), fname, AT_REMOVEDIR); + } + } else { + + unlink(fname); + } + } + } + closedir(dr); +} + + +static int +scord_unregister_job(spank_t sp, scord_plugin_config_t cfg, + scord_nodelist_t nodelist, uint32_t jobid) { + (void) sp; + (void) jobid; + int rc = 0; + + /* First determine the node on which to launch scord-ctl (typically the + * first node of the allocation) */ + ADM_node_t ctl_node = scord_nodelist_get_node(nodelist, 0); + cfg.scordctl_info.addr = margo_address_create( + cfg.scordctl_info.proto, ADM_node_get_hostname(ctl_node), + cfg.scordctl_info.port); + + if(!cfg.scordctl_info.addr) { + slurm_error("%s: failed to compute address scordctl server", + plugin_name); + return -1; + } + + /* The Cargo master will also typically reside on the first node of the + * allocation */ + cfg.cargo_info.addr = margo_address_create(cfg.cargo_info.proto, + ADM_node_get_hostname(ctl_node), + cfg.cargo_info.port); + + slurm_debug("%s: %s: scord_info:", plugin_name, __func__); + slurm_debug("%s: %s: addr: \"%s\",", plugin_name, __func__, + cfg.scord_info.addr); + slurm_debug("%s: %s: proto: \"%s\",", plugin_name, __func__, + cfg.scord_info.proto); + slurm_debug("%s: %s: port: %d,", plugin_name, __func__, + cfg.scord_info.port); + + slurm_debug("%s: %s: scordctl_info:", plugin_name, __func__); + slurm_debug("%s: %s: addr: \"%s\",", plugin_name, __func__, + cfg.scordctl_info.addr); + slurm_debug("%s: %s: proto: \"%s\",", plugin_name, __func__, + cfg.scordctl_info.proto); + slurm_debug("%s: %s: port: %d,", plugin_name, __func__, + cfg.scordctl_info.port); + slurm_debug("%s: %s: cargo_info:", plugin_name, __func__); + slurm_debug("%s: %s: addr: \"%s\",", plugin_name, __func__, + cfg.cargo_info.addr); + slurm_debug("%s: %s: proto: \"%s\",", plugin_name, __func__, + cfg.cargo_info.proto); + slurm_debug("%s: %s: port: %d,", plugin_name, __func__, + cfg.cargo_info.port); + + + // Step 1 : Stage-out + + // divide input_datasets into sources and targets + if(output_datasets_count > 0) { + ADM_dataset_t* sources = + malloc((output_datasets_count) * sizeof(ADM_dataset_t)); + ADM_dataset_t* targets = + malloc((output_datasets_count) * sizeof(ADM_dataset_t)); + ; + + for(unsigned int i = 0; i < output_datasets_count; i++) { + // ADM_dataset_route_list_t r_inputs; + sources[i] = scord_reqs->r_outputs->l_routes[i].d_src; + targets[i] = scord_reqs->r_outputs->l_routes[i].d_dst; + } + + + if(ADM_transfer_datasets( + scord_server, scord_job, sources, output_datasets_count, + targets, output_datasets_count, 0, 0, ADM_MAPPING_ONE_TO_ONE, + &transfer, true) != ADM_SUCCESS) { + slurm_error("%s: adhoc storage transfer failed", plugin_name); + rc = -1; + goto end; + } + } + + // remove_adhoc_storage + + ADM_remove_adhoc_storage(scord_server, adhoc_storage); + // remove all the files (this should be done on all the nodes.. TODO) + remove_dir_content(adhoc_path); + + // remove job + ADM_remove_job(scord_server, scord_job); + +end: + if(adhoc_path) { + free(adhoc_path); + } + + if(scord_job) { + ADM_job_destroy(scord_job); + } + + if(scord_reqs) { + ADM_job_requirements_destroy(scord_reqs); + } + + if(adhoc_storage) { + ADM_adhoc_storage_destroy(adhoc_storage); + } + + if(adhoc_ctx) { + ADM_adhoc_context_destroy(adhoc_ctx); + } + + if(adhoc_resources) { + ADM_adhoc_resources_destroy(adhoc_resources); + } + + if(job_resources) { + ADM_job_resources_destroy(job_resources); + } + + if(scord_server) { + ADM_server_destroy(scord_server); + } + + return rc; +} + + +int +slurm_spank_exit(spank_t sp, int ac, char** av) { + + (void) sp; + (void) ac; + (void) av; + + + spank_err_t rc = ESPANK_SUCCESS; + + // spank_context_t sctx = spank_context(); + + + // slurm_debug("%s: %s() registering options", plugin_name, __func__); + + /* register adm/scord options */ + // struct spank_option* opt = &spank_opts[0]; + // while(opt->name) { + // rc = spank_option_register(sp, opt++); + // } + + + /* No ADMIRE options were passed to the job, nothing to do here */ + if(!scord_flag) { + return 0; + } + + /* Get relative for the node executing id. Job registration is only done + * by the node with ID 0 */ + + uint32_t nodeid; + + if((rc = spank_get_item(sp, S_JOB_NODEID, &nodeid)) != ESPANK_SUCCESS) { + slurm_error("%s: failed to get node id: %s", plugin_name, + spank_strerror(rc)); + return -1; + } + + slurm_info("%s: %s: node id: %d", plugin_name, __func__, nodeid); + + if(nodeid != 0) { + return 0; + } + + scord_plugin_config_t cfg = default_cfg; + + if(process_config(ac, av, &cfg) != 0) { + return -1; + } + + /* get job id */ + uint32_t jobid; + + if((rc = spank_get_item(sp, S_JOB_ID, &jobid)) != ESPANK_SUCCESS) { + slurm_info("%s: failed to get jobid: %s", plugin_name, + spank_strerror(rc)); + return -1; + } + + slurm_info("%s: %s: job id: %d", plugin_name, __func__, jobid); + + /* list of job nodes */ + hostlist_t* hostlist = get_slurm_hostlist(sp); + if(!hostlist) { + slurm_info("%s: failed to retrieve hostlist", plugin_name); + return -1; + } + + char buf[256]; + slurm_hostlist_ranged_string(hostlist, sizeof(buf), buf); + slurm_info("%s: %s: hostlist: %s", plugin_name, __func__, buf); + + scord_nodelist_t nodelist = scord_nodelist_create(hostlist); + + int ec; + + if(!nodelist) { + slurm_info("%s: failed to create nodelist", plugin_name); + ec = -1; + goto cleanup; + } + + + // We get here, do stage-out and clean up + + scord_unregister_job(sp, cfg, nodelist, jobid); + + +cleanup: + if(cfg.scordctl_info.addr) { + free((void*) cfg.scordctl_info.addr); + } + + scord_nodelist_destroy(nodelist); + slurm_hostlist_destroy(hostlist); + + return ec; +} \ No newline at end of file diff --git a/plugins/slurm/systemd/cargo@.service.in b/plugins/slurm/systemd/cargo@.service.in index b0bcb46b..0270b957 100644 --- a/plugins/slurm/systemd/cargo@.service.in +++ b/plugins/slurm/systemd/cargo@.service.in @@ -7,5 +7,5 @@ EnvironmentFile=%h/.config/cargo/%I.cfg ExecStart=@CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/slurm/cargoctl start -s ${CARGO_ADDRESS} -H ${CARGO_HOSTS} -n ${CARGO_NUM_NODES} ExecStop=@CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/slurm/cargoctl stop -s ${CARGO_ADDRESS} Restart=no -PrivateTmp=true +PrivateTmp=false NoNewPrivileges=true diff --git a/src/lib/c_wrapper.cpp b/src/lib/c_wrapper.cpp index 1b9f797a..29cd475b 100644 --- a/src/lib/c_wrapper.cpp +++ b/src/lib/c_wrapper.cpp @@ -227,8 +227,8 @@ ADM_transfer_datasets(ADM_server_t server, ADM_job_t job, ADM_dataset_t sources[], size_t sources_len, ADM_dataset_t targets[], size_t targets_len, ADM_qos_limit_t limits[], size_t limits_len, - ADM_transfer_mapping_t mapping, - ADM_transfer_t* transfer) { + ADM_transfer_mapping_t mapping, ADM_transfer_t* transfer, + bool wait = false) { const auto rv = scord::detail::transfer_datasets( scord::server{server}, scord::job{job}, @@ -241,6 +241,36 @@ ADM_transfer_datasets(ADM_server_t server, ADM_job_t job, } *transfer = static_cast(rv.value()); + if(wait) { + auto rv_wait = scord::detail::query_transfer( + scord::server{server}, scord::job{job}, + scord::transfer{rv.value()}); + if(!rv_wait) { + if(rv_wait.error().value() == scord::error_code::no_such_entity) + { + return ADM_SUCCESS; + } + else + return rv_wait.error(); + } + auto status = rv_wait.value().status(); + + while(status == scord::transfer_state::type::running or + status == scord::transfer_state::type::queued) { + sleep(5); + rv_wait = scord::detail::query_transfer( + scord::server{server}, scord::job{job}, + scord::transfer{rv.value()}); + if(!rv_wait) { + if(rv_wait.error().value() == scord::error_code::no_such_entity) { + return ADM_SUCCESS; + } + else + return rv_wait.error(); + } + status = rv_wait.value().status(); + } + } return ADM_SUCCESS; } diff --git a/src/lib/scord/scord.h b/src/lib/scord/scord.h index d3c75164..b45effc8 100644 --- a/src/lib/scord/scord.h +++ b/src/lib/scord/scord.h @@ -249,7 +249,7 @@ ADM_transfer_datasets(ADM_server_t server, ADM_job_t job, ADM_dataset_t sources[], size_t sources_len, ADM_dataset_t targets[], size_t targets_len, ADM_qos_limit_t limits[], size_t limits_len, - ADM_transfer_mapping_t mapping, ADM_transfer_t* transfer); + ADM_transfer_mapping_t mapping, ADM_transfer_t* transfer, bool wait); /** * Sets the obtained bw for the transfer operation -- GitLab From 9ed9349656a7a9bb720965521bb3c085b090ed95 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Fri, 12 Apr 2024 08:06:19 +0200 Subject: [PATCH 6/9] Solving root installation defines --- plugins/adhoc_services.d/gekkofs.sh | 24 ++++++++++++++++-------- plugins/slurm/scord_prolog.sh.in | 19 +++++++++++++++---- plugins/slurm/slurmadmcli.c | 8 ++++---- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/plugins/adhoc_services.d/gekkofs.sh b/plugins/adhoc_services.d/gekkofs.sh index 57fed5af..e6a4631f 100644 --- a/plugins/adhoc_services.d/gekkofs.sh +++ b/plugins/adhoc_services.d/gekkofs.sh @@ -1,34 +1,42 @@ #!/usr/bin/bash echo "GEKKOFS Script Called" $HOSTNAME $SLURM_JOBID - +# If GKFS_DAEMON is not defined then define it here +if [ -z "$GKFS_DAEMON" ]; then + GKFS_DAEMON=/home/rnou/iodeps/bin/gkfs_daemon +fi +# If LIBGKFS_HOSTS_FILE is not defined then define it here +if [ -z "$LIBGKFS_HOSTS_FILE" ]; then + LIBGKFS_HOSTS_FILE=/tmp/gekkofs/gkfs_hosts.txt +fi if [ "$1" == "start" ]; then echo "Starting GEKKOFS" nodes=$3 num_nodes=$(echo $nodes | awk -F, '{print NF}') - # If num_nodes is greater than 40, we are on the testing environment - if [ $num_nodes -gt 40 ]; then + # If num_nodes is 50, we are on the testing environment + if [ $num_nodes -eq 50 ]; then exit 0 fi workdir=$5 datadir=$7 mountdir=$9 unset SLURM_CPU_BIND SLURM_CPU_BIND_LIST SLURM_CPU_BIND_TYPE SLURM_CPU_BIND_VERBOSE - srun -N $num_nodes -n $num_nodes --oversubscribe --overlap --cpus-per-task=1 --mem-per-cpu=1 --export=ALL bash -c "mkdir -p $mountdir; mkdir -p $datadir" - srun -N $num_nodes -n $num_nodes --oversubscribe --overlap --cpus-per-task=4 --mem-per-cpu=1 --export=ALL bash -c "gkfs_daemon --rootdir $datadir --mountdir $mountdir" & + + srun -N $num_nodes -n $num_nodes --oversubscribe --overlap --cpus-per-task=1 --mem-per-cpu=1 --export=ALL /usr/bin/bash -c "mkdir -p $mountdir; mkdir -p $datadir" + srun -N $num_nodes -n $num_nodes --oversubscribe --overlap --cpus-per-task=1 --mem-per-cpu=1 --export=ALL /usr/bin/bash -c "$GKFS_DAEMON --rootdir $datadir --mountdir $mountdir -H $LIBGKFS_HOSTS_FILE" & sleep 4 elif [ "$1" == "stop" ]; then echo "Stopping GEKKOFS" nodes=$3 num_nodes=$(echo $nodes | awk -F, '{print NF}') - # If num_nodes is greater than 40, we are on the testing environment - if [ $num_nodes -gt 40 ]; then + # If num_nodes is 50, we are on the testing environment + if [ $num_nodes -eq 50 ]; then exit 0 fi unset SLURM_CPU_BIND SLURM_CPU_BIND_LIST SLURM_CPU_BIND_TYPE SLURM_CPU_BIND_VERBOSE - srun -N $num_nodes -n $num_nodes --overlap --oversubscribe --cpus-per-task=1 --mem-per-cpu=1 --export=ALL bash -c "pkill -9 gkfs_daemon" + srun -N $num_nodes -n $num_nodes --overlap --oversubscribe --cpus-per-task=1 --mem-per-cpu=1 --export=ALL /usr/bin/bash -c "pkill -9 gkfs_daemon" elif [ "$1" == "expand" ]; then echo "Expand command" elif [ "$1" == "shrink" ]; then diff --git a/plugins/slurm/scord_prolog.sh.in b/plugins/slurm/scord_prolog.sh.in index d67e0ece..6c187671 100755 --- a/plugins/slurm/scord_prolog.sh.in +++ b/plugins/slurm/scord_prolog.sh.in @@ -176,11 +176,21 @@ CARGO_ID=$(echo "cargo_$SLURM_JOB_ID.$SLURM_JOB_UID" | sha256sum | awk '{ print CARGO_CONFIG_FILE=$CARGO_CONFIG_DIRECTORY/$CARGO_ID.cfg CARGO_MASTER_ADDRESS="$SCORDCTL_PROTO://$ADDRESS:$CARGO_PORT" CARGO_INSTANCE_NAME=$(systemd-escape --template cargo@.service "$CARGO_ID") +# This will fail always as we do not have the job registered in this moment + +#if ! CARGO_NUM_NODES=$(@SCORD_QUERY_PROGRAM@ -s @SCORD_SERVICE_ADDRESS@ "$SLURM_JOB_ID" | grep io_procs | awk '{ print $2 }'); then +# echo "Failed to determine the number of I/O processes for job $SLURM_JOB_ID" +#else +CARGO_NUM_NODES=${#hostnames[@]} +#fi +# If LIBGKFS_HOSTS_FILE is nor defined then do it +if [ -z "$LIBGKFS_HOSTS_FILE" ]; then + LIBGKFS_HOSTS_FILE=/tmp/gekkofs/gkfs_hosts.txt +fi -if ! CARGO_NUM_NODES=$(@SCORD_QUERY_PROGRAM@ -s @SCORD_SERVICE_ADDRESS@ "$SLURM_JOB_ID" | grep io_procs | awk '{ print $2 }'); then - echo "Failed to determine the number of I/O processes for job $SLURM_JOB_ID" -else - CARGO_NUM_NODES=${#hostnames[@]} +# if number of CARGO_NUM_NODES is below 2, use 2, they will be colocated +if [ $CARGO_NUM_NODES -lt 2 ]; then + CARGO_NUM_NODES=2 fi cat <>"$CARGO_CONFIG_FILE" @@ -188,6 +198,7 @@ CARGO_ID=$CARGO_ID CARGO_HOSTS=$hostnames_csv CARGO_NUM_NODES=$CARGO_NUM_NODES CARGO_ADDRESS=$CARGO_MASTER_ADDRESS +LIBGKFS_HOSTS_FILE=$LIBGKFS_HOSTS_FILE EOT CUID=$(id -u $SLURM_JOB_USER) chown "$SLURM_JOB_USER":"$SLURM_JOB_GROUP" "$CARGO_CONFIG_FILE" diff --git a/plugins/slurm/slurmadmcli.c b/plugins/slurm/slurmadmcli.c index 70076867..d109bfea 100644 --- a/plugins/slurm/slurmadmcli.c +++ b/plugins/slurm/slurmadmcli.c @@ -953,11 +953,11 @@ scord_unregister_job(spank_t sp, scord_plugin_config_t cfg, } // remove_adhoc_storage - - ADM_remove_adhoc_storage(scord_server, adhoc_storage); - // remove all the files (this should be done on all the nodes.. TODO) + ADM_terminate_adhoc_storage(scord_server, adhoc_storage); + // ADM_remove_adhoc_storage(scord_server, adhoc_storage); + // remove all the files (this should be done on all the nodes.. TODO) remove_dir_content(adhoc_path); - + rmdir(adhoc_path); // remove job ADM_remove_job(scord_server, scord_job); -- GitLab From e1b77d53fbec24f83805b0a3fccfb0b65981f491 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Fri, 12 Apr 2024 10:43:35 +0200 Subject: [PATCH 7/9] Adapt to SLURM version (hostlist pointer) --- examples/c/ADM_cancel_transfer.c | 2 +- examples/c/ADM_get_transfer_priority.c | 2 +- examples/c/ADM_link_transfer_to_data_operation.c | 2 +- examples/c/ADM_set_transfer_priority.c | 2 +- examples/c/ADM_transfer_datasets.c | 2 +- plugins/slurm/slurmadmcli.c | 9 +++++++-- plugins/slurm/utils.c | 6 +++--- plugins/slurm/utils.h | 10 +++++++--- 8 files changed, 22 insertions(+), 13 deletions(-) diff --git a/examples/c/ADM_cancel_transfer.c b/examples/c/ADM_cancel_transfer.c index 76fff6aa..60c07e00 100644 --- a/examples/c/ADM_cancel_transfer.c +++ b/examples/c/ADM_cancel_transfer.c @@ -117,7 +117,7 @@ main(int argc, char* argv[]) { ADM_transfer_t tx; ret = ADM_transfer_datasets(server, job, sources, sources_len, targets, - targets_len, limits, limits_len, mapping, &tx); + targets_len, limits, limits_len, mapping, &tx, false); if(ret != ADM_SUCCESS) { fprintf(stderr, diff --git a/examples/c/ADM_get_transfer_priority.c b/examples/c/ADM_get_transfer_priority.c index 0442740e..bdaded3d 100644 --- a/examples/c/ADM_get_transfer_priority.c +++ b/examples/c/ADM_get_transfer_priority.c @@ -117,7 +117,7 @@ main(int argc, char* argv[]) { ADM_transfer_t tx; ret = ADM_transfer_datasets(server, job, sources, sources_len, targets, - targets_len, limits, limits_len, mapping, &tx); + targets_len, limits, limits_len, mapping, &tx, false); if(ret != ADM_SUCCESS) { fprintf(stderr, diff --git a/examples/c/ADM_link_transfer_to_data_operation.c b/examples/c/ADM_link_transfer_to_data_operation.c index cbadc2a6..c784fbfd 100644 --- a/examples/c/ADM_link_transfer_to_data_operation.c +++ b/examples/c/ADM_link_transfer_to_data_operation.c @@ -121,7 +121,7 @@ main(int argc, char* argv[]) { ADM_transfer_t tx; ret = ADM_transfer_datasets(server, job, sources, sources_len, targets, - targets_len, limits, limits_len, mapping, &tx); + targets_len, limits, limits_len, mapping, &tx, false); if(ret != ADM_SUCCESS) { diff --git a/examples/c/ADM_set_transfer_priority.c b/examples/c/ADM_set_transfer_priority.c index 3d4bb8dc..5164a9ba 100644 --- a/examples/c/ADM_set_transfer_priority.c +++ b/examples/c/ADM_set_transfer_priority.c @@ -117,7 +117,7 @@ main(int argc, char* argv[]) { ADM_transfer_t tx; ret = ADM_transfer_datasets(server, job, sources, sources_len, targets, - targets_len, limits, limits_len, mapping, &tx); + targets_len, limits, limits_len, mapping, &tx, false); if(ret != ADM_SUCCESS) { fprintf(stderr, diff --git a/examples/c/ADM_transfer_datasets.c b/examples/c/ADM_transfer_datasets.c index e1bc1d80..4086574e 100644 --- a/examples/c/ADM_transfer_datasets.c +++ b/examples/c/ADM_transfer_datasets.c @@ -117,7 +117,7 @@ main(int argc, char* argv[]) { ADM_transfer_t tx; ret = ADM_transfer_datasets(server, job, sources, NSOURCES, targets, - NTARGETS, limits, NLIMITS, mapping, &tx); + NTARGETS, limits, NLIMITS, mapping, &tx, false); if(ret != ADM_SUCCESS) { fprintf(stderr, diff --git a/plugins/slurm/slurmadmcli.c b/plugins/slurm/slurmadmcli.c index d109bfea..30a2b8b0 100644 --- a/plugins/slurm/slurmadmcli.c +++ b/plugins/slurm/slurmadmcli.c @@ -44,6 +44,11 @@ #include #include +#if SLURM_VERSION_NUMBER > SLURM_VERSION_NUM(23,0,0) +#define POINTER * +#else +#define POINTER +#endif /** * Slurm SPANK plugin to handle the ADMIRE adhoc storage CLI. Options are * forwarded to scord on srun, salloc and sbatch. See the struct spank_option @@ -805,7 +810,7 @@ slurm_spank_user_init(spank_t sp, int ac, char** av) { slurm_debug("%s: %s: job id: %d", plugin_name, __func__, jobid); /* list of job nodes */ - hostlist_t* hostlist = get_slurm_hostlist(sp); + hostlist_t POINTER hostlist = get_slurm_hostlist(sp); if(!hostlist) { slurm_error("%s: failed to retrieve hostlist", plugin_name); return -1; @@ -1060,7 +1065,7 @@ slurm_spank_exit(spank_t sp, int ac, char** av) { slurm_info("%s: %s: job id: %d", plugin_name, __func__, jobid); /* list of job nodes */ - hostlist_t* hostlist = get_slurm_hostlist(sp); + hostlist_t POINTER hostlist = get_slurm_hostlist(sp); if(!hostlist) { slurm_info("%s: failed to retrieve hostlist", plugin_name); return -1; diff --git a/plugins/slurm/utils.c b/plugins/slurm/utils.c index cd06c733..0e428f0c 100644 --- a/plugins/slurm/utils.c +++ b/plugins/slurm/utils.c @@ -32,7 +32,7 @@ extern const char plugin_name[]; -hostlist_t * +hostlist_t POINTER get_slurm_hostlist(spank_t sp) { /* get list of nodes. /!\ at this point env SLURM_NODELIST is @@ -72,7 +72,7 @@ get_slurm_hostlist(spank_t sp) { slurm_debug("%s: SLURM_NODELIST=%s", plugin_name, nodelist); - hostlist_t * hl = NULL; + hostlist_t POINTER hl = NULL; hl = slurm_hostlist_create(nodelist); if(!hl) { @@ -84,7 +84,7 @@ get_slurm_hostlist(spank_t sp) { } scord_nodelist_t -scord_nodelist_create(hostlist_t* hostlist) { +scord_nodelist_create(hostlist_t POINTER hostlist) { ADM_node_t* nodes = NULL; char* host = NULL; diff --git a/plugins/slurm/utils.h b/plugins/slurm/utils.h index 3405c528..022683c4 100644 --- a/plugins/slurm/utils.h +++ b/plugins/slurm/utils.h @@ -29,8 +29,12 @@ #include #include - -hostlist_t * +#if SLURM_VERSION_NUMBER > SLURM_VERSION_NUM(23,0,0) +#define POINTER * +#else +#define POINTER +#endif +hostlist_t POINTER get_slurm_hostlist(spank_t sp); typedef struct scord_nodelist { @@ -39,7 +43,7 @@ typedef struct scord_nodelist { }* scord_nodelist_t; scord_nodelist_t -scord_nodelist_create(hostlist_t* hostlist); +scord_nodelist_create(hostlist_t POINTER hostlist); int scord_nodelist_get_nodecount(scord_nodelist_t nodelist); -- GitLab From 28f445a40d732deb6882199ca2d190d424de74d3 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Tue, 30 Apr 2024 11:44:48 +0200 Subject: [PATCH 8/9] 0.3.6 Release --- CMakeLists.txt | 4 ++-- COPYRIGHT_NOTICE | 2 +- plugins/slurm/slurmadmcli.c | 3 ++- spack/packages/scord/package.py | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cd6304d..fa400f92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,7 @@ cmake_minimum_required(VERSION 3.19) project( scord - VERSION 0.3.4 + VERSION 0.3.6 LANGUAGES C CXX ) @@ -312,7 +312,7 @@ find_package(RedisPlusPlus 1.3.3 REQUIRED) ### Cargo: required for transferring datasets between storage tiers message(STATUS "[${PROJECT_NAME}] Checking for Cargo") -find_package(Cargo 0.3.1 REQUIRED) +find_package(Cargo 0.3.6 REQUIRED) message(STATUS "[${PROJECT_NAME}] Checking for Hiredis") find_package(hiredis REQUIRED) diff --git a/COPYRIGHT_NOTICE b/COPYRIGHT_NOTICE index a3f50b7b..502aa0a7 100644 --- a/COPYRIGHT_NOTICE +++ b/COPYRIGHT_NOTICE @@ -1,5 +1,5 @@ /****************************************************************************** - * Copyright 2021-2022, Barcelona Supercomputing Center (BSC), Spain + * Copyright 2021-2024, Barcelona Supercomputing Center (BSC), Spain * * This software was partially supported by the EuroHPC-funded project ADMIRE * (Project ID: 956748, https://www.admire-eurohpc.eu). diff --git a/plugins/slurm/slurmadmcli.c b/plugins/slurm/slurmadmcli.c index 30a2b8b0..7a73dcec 100644 --- a/plugins/slurm/slurmadmcli.c +++ b/plugins/slurm/slurmadmcli.c @@ -1032,7 +1032,8 @@ slurm_spank_exit(spank_t sp, int ac, char** av) { /* Get relative for the node executing id. Job registration is only done * by the node with ID 0 */ - + spank_context_t sctx = spank_context(); + if(sctx != S_CTX_REMOTE) return 0; uint32_t nodeid; if((rc = spank_get_item(sp, S_JOB_NODEID, &nodeid)) != ESPANK_SUCCESS) { diff --git a/spack/packages/scord/package.py b/spack/packages/scord/package.py index 279967eb..e82c2a31 100644 --- a/spack/packages/scord/package.py +++ b/spack/packages/scord/package.py @@ -35,7 +35,7 @@ class Scord(CMakePackage): homepage = "https://storage.bsc.es/gitlab/eu/admire/io-scheduler" url = ("https://storage.bsc.es/gitlab/eu/admire/io-scheduler/-/archive/" - "v0.3.4/io-scheduler-v0.3.4.tar.gz") + "v0.3.6/io-scheduler-v0.3.6.tar.gz") git = "https://storage.bsc.es/gitlab/eu/admire/io-scheduler.git" maintainers("alberto-miranda") @@ -57,6 +57,7 @@ class Scord(CMakePackage): version("0.3.3", sha256="a8b5a8d05858bee91b9675ca6c929f4c16b5b2562f4e6a8dba3ce0aacb721f48") version("0.3.4", sha256="e5e6a46d174db266e1caa2689cd17d88a7dc0623429c5efba20a374383f54a12") + version("0.3.6") # build variants variant('build_type', default='Release', -- GitLab From 360fe5b6a3c4d8a76568cad1c5275da9452ef22c Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 6 Jun 2024 16:14:02 +0200 Subject: [PATCH 9/9] added none --- src/scord/rpc_server.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/scord/rpc_server.cpp b/src/scord/rpc_server.cpp index eca36f9e..7f91a0af 100644 --- a/src/scord/rpc_server.cpp +++ b/src/scord/rpc_server.cpp @@ -62,8 +62,11 @@ dataset_process(std::string id) { } else if(id.find("dataclay:") != std::string::npos) { id = id.substr(strlen("dataclay:")); type = cargo::dataset::type::dataclay; - } else + } else if(id.find("posix:") != std::string::npos) { + id = id.substr(strlen("posix:")); type = cargo::dataset::type::posix; + } + else type = cargo::dataset::type::none; return cargo::dataset{id, type}; } @@ -850,7 +853,7 @@ rpc_server::transfer_datasets(const network::request& req, scord::job_id job_id, std::transform(targets.cbegin(), targets.cend(), std::back_inserter(outputs), [](const auto& tgt) { return ::dataset_process(tgt.id()); }); - + const auto cargo_tx = cargo::transfer_datasets(srv, inputs, outputs); // Register the transfer into the `tranfer_manager`. -- GitLab