diff --git a/CMakeLists.txt b/CMakeLists.txt index 86501d1b6095e3d1be570753e0545b6b7110a660..622c1c165b2b729c2ff805db6b219e749c1d939c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,7 +157,7 @@ set(SCORD_CTL_BIND_PORT ) message(STATUS "[${PROJECT_NAME}] server bind port: ${SCORD_CTL_BIND_PORT}") -set(CARGO_PORT +set(CARGO_BIND_PORT "62000" CACHE STRING "Define the port through wich we should commmunicate with Cargo" @@ -303,7 +303,7 @@ find_package(RedisPlusPlus 1.3.3 REQUIRED) ### Cargo: required for transferring datasets between storage tiers message(STATUS "[${PROJECT_NAME}] Checking for Cargo") -find_package(Cargo 0.2.0 REQUIRED) +find_package(Cargo 0.3.1 REQUIRED) # ############################################################################## diff --git a/etc/scord-ctl.conf.in b/etc/scord-ctl.conf.in index c0c12f7cb21355f6ce2e98040ab9271f9e0abc54..6cadeeb7b723e427482d467f88f8a2986992faa0 100644 --- a/etc/scord-ctl.conf.in +++ b/etc/scord-ctl.conf.in @@ -35,7 +35,145 @@ config: command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/gekkofs.sh stop --workdir {ADHOC_DIRECTORY} + expand: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/gekkofs.sh + expand + --hosts {ADHOC_NODES} + + shrink: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/gekkofs.sh + shrink + --hosts {ADHOC_NODES} + dataclay: + # The default working directory for adhoc instances of this type + working_directory: /tmp/dataclay + startup: + # Specific environment variables that should be set for the adhoc + # instance. These will be merged with the environment variables + # already set by Slurm. + environment: + VAR0: value0 + VAR1: value1 + # The command that `scord-ctl` will use to start an adhoc instance of + # this type. The following variables are supported that will be + # automatically replaced by scord-ctl if found between curly braces: + # * ADHOC_NODES: A comma separated list of valid job hostnames that + # can be used to start the adhoc instance. + # * ADHOC_DIRECTORY: A unique working directory for each specific + # adhoc instance. This directory will be created by scord-ctl under + # `working_directory` and automatically removed after the adhoc + # instance has been shut down. + # * ADHOC_ID: - A unique ID for the adhoc instance. + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/dataclay.sh + start + --hosts {ADHOC_NODES} + --workdir {ADHOC_DIRECTORY} + --datadir {ADHOC_DIRECTORY}/data + --mountdir {ADHOC_DIRECTORY}/mnt + shutdown: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/dataclay.sh + stop + --workdir {ADHOC_DIRECTORY} + expand: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/dataclay.sh + expand + --hosts {ADHOC_NODES} + + shrink: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/dataclay.sh + shrink + --hosts {ADHOC_NODES} + + expand: + # The default working directory for adhoc instances of this type + working_directory: /tmp/expand + startup: + # Specific environment variables that should be set for the adhoc + # instance. These will be merged with the environment variables + # already set by Slurm. + environment: + VAR0: value0 + VAR1: value1 + # The command that `scord-ctl` will use to start an adhoc instance of + # this type. The following variables are supported that will be + # automatically replaced by scord-ctl if found between curly braces: + # * ADHOC_NODES: A comma separated list of valid job hostnames that + # can be used to start the adhoc instance. + # * ADHOC_DIRECTORY: A unique working directory for each specific + # adhoc instance. This directory will be created by scord-ctl under + # `working_directory` and automatically removed after the adhoc + # instance has been shut down. + # * ADHOC_ID: - A unique ID for the adhoc instance. + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/expand.sh + start + --hosts {ADHOC_NODES} + --workdir {ADHOC_DIRECTORY} + --datadir {ADHOC_DIRECTORY}/data + --mountdir {ADHOC_DIRECTORY}/mnt + shutdown: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/expand.sh + stop + --workdir {ADHOC_DIRECTORY} + expand: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/expand.sh + expand + --hosts {ADHOC_NODES} + + shrink: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/expand.sh + shrink + --hosts {ADHOC_NODES} + hercules: + # The default working directory for adhoc instances of this type + working_directory: /tmp/hercules + startup: + # Specific environment variables that should be set for the adhoc + # instance. These will be merged with the environment variables + # already set by Slurm. + environment: + VAR0: value0 + VAR1: value1 + # The command that `scord-ctl` will use to start an adhoc instance of + # this type. The following variables are supported that will be + # automatically replaced by scord-ctl if found between curly braces: + # * ADHOC_NODES: A comma separated list of valid job hostnames that + # can be used to start the adhoc instance. + # * ADHOC_DIRECTORY: A unique working directory for each specific + # adhoc instance. This directory will be created by scord-ctl under + # `working_directory` and automatically removed after the adhoc + # instance has been shut down. + # * ADHOC_ID: - A unique ID for the adhoc instance. + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/hercules.sh + start + --hosts {ADHOC_NODES} + --workdir {ADHOC_DIRECTORY} + --datadir {ADHOC_DIRECTORY}/data + --mountdir {ADHOC_DIRECTORY}/mnt + shutdown: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/hercules.sh + stop + --workdir {ADHOC_DIRECTORY} + expand: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/hercules.sh + expand + --hosts {ADHOC_NODES} + + shrink: + environment: + command: @CMAKE_INSTALL_FULL_DATADIR@/@PROJECT_NAME@/adhoc_services.d/hercules.sh + shrink + --hosts {ADHOC_NODES} # default storage tiers made available to applications storage: diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5959af25fad05ef32bf53e112f0dff73c4185da0..6d47717c64a486920f5343507d043723e9374ce4 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -36,6 +36,9 @@ if(SCORD_BUILD_TESTS) set(SCORD_ADDRESS_STRING ${SCORD_TRANSPORT_PROTOCOL}://${SCORD_BIND_ADDRESS}:${SCORD_BIND_PORT}) + set(DATA_STAGER_ADDRESS_STRING + ${SCORD_TRANSPORT_PROTOCOL}://${SCORD_BIND_ADDRESS}:${CARGO_BIND_PORT}) + add_test(start_scord_daemon ${CMAKE_SOURCE_DIR}/scripts/runner.sh start scord.pid ${CMAKE_BINARY_DIR}/src/scord/scord -f -c ${CMAKE_CURRENT_BINARY_DIR}/scord.conf @@ -73,6 +76,20 @@ if(SCORD_BUILD_TESTS) set_tests_properties(stop_scord_ctl PROPERTIES FIXTURES_CLEANUP scord_ctl) + + add_test(start_cargo + ${CMAKE_SOURCE_DIR}/scripts/runner.sh start cargo.pid + mpirun --allow-run-as-root -n 2 ${CARGO_BIN_INSTALL_DIR}/cargo -l ${DATA_STAGER_ADDRESS_STRING} -o ${TEST_DIRECTORY}/cargo.log + ) + set_tests_properties(start_cargo + PROPERTIES FIXTURES_SETUP cargo) + + add_test(stop_cargo + ${CMAKE_SOURCE_DIR}/scripts/runner.sh stop TERM cargo.pid) + + set_tests_properties(stop_cargo + PROPERTIES FIXTURES_CLEANUP cargo) + endif() add_subdirectory(c) diff --git a/examples/c/ADM_cancel_transfer.c b/examples/c/ADM_cancel_transfer.c index 77bb2a0b2ecf5c768390990dce1225f6b274653f..76fff6aa861168298082a063e9c8961625c51f9a 100644 --- a/examples/c/ADM_cancel_transfer.c +++ b/examples/c/ADM_cancel_transfer.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,10 +51,15 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -69,8 +70,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; @@ -143,7 +145,8 @@ main(int argc, char* argv[]) { cleanup: ADM_remove_job(server, job); ADM_server_destroy(server); - destroy_datasets(inputs, NINPUTS); - destroy_datasets(outputs, NOUTPUTS); + destroy_routes(inputs, NINPUTS); + destroy_routes(outputs, NOUTPUTS); + destroy_routes(expected_outputs, NEXPOUTPUTS); exit(exit_status); } diff --git a/examples/c/ADM_connect_data_operation.c b/examples/c/ADM_connect_data_operation.c index 8ae8f55ea0a3ce493f1eca9288ae299f85f285f5..f860dd8030794384556bb8bb9e36c372239c3f68 100644 --- a/examples/c/ADM_connect_data_operation.c +++ b/examples/c/ADM_connect_data_operation.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,18 +51,23 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_adhoc_resources_t adhoc_resources = ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES); assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { assert(job_resources); ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; @@ -108,8 +110,8 @@ main(int argc, char* argv[]) { exit_status = EXIT_SUCCESS; bool should_stream = false; - ret = ADM_connect_data_operation(server, job, inputs[0], outputs[0], - should_stream); + ret = ADM_connect_data_operation(server, job, /*inputs[0]*/ NULL, + /*outputs[0]*/ NULL, should_stream); if(ret != ADM_SUCCESS) { @@ -125,6 +127,7 @@ main(int argc, char* argv[]) { "successfully\n"); cleanup: + /* for(int i = 0; i < NINPUTS; ++i) { ADM_dataset_destroy(inputs[i]); } @@ -132,6 +135,7 @@ cleanup: for(int i = 0; i < NOUTPUTS; ++i) { ADM_dataset_destroy(outputs[i]); } + */ ADM_remove_job(server, job); ADM_server_destroy(server); diff --git a/examples/c/ADM_define_data_operation.c b/examples/c/ADM_define_data_operation.c index 04a151c497c4e88b95c434b7a4b6506458e11a2e..6f08e8b88eb29b6dae7a73c7966f9664bedbf9a3 100644 --- a/examples/c/ADM_define_data_operation.c +++ b/examples/c/ADM_define_data_operation.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -56,10 +52,15 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -70,8 +71,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -91,7 +92,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_deploy_adhoc_storage.c b/examples/c/ADM_deploy_adhoc_storage.c index b5317eb193e9e283915cff495e2a5fef29ca8a8e..fdc5b3312f56df58a87cc7458b6838f9e1449354 100644 --- a/examples/c/ADM_deploy_adhoc_storage.c +++ b/examples/c/ADM_deploy_adhoc_storage.c @@ -27,10 +27,6 @@ #include #include "common.h" -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -38,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -82,9 +79,9 @@ main(int argc, char* argv[]) { } // 3. the adhoc storage execution context - adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); if(adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing adhoc context\n"); @@ -115,9 +112,9 @@ main(int argc, char* argv[]) { // system, let's prepare a new execution context for the adhoc // storage system - new_adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 200, false); + new_adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 200, false); if(new_adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing new adhoc context\n"); diff --git a/examples/c/ADM_finalize_data_operation.c b/examples/c/ADM_finalize_data_operation.c index 89c421ea1c78e51ab66f1141a0a4e570c283236e..2605cc69c2272e19a48193110ab9d441fd2061cf 100644 --- a/examples/c/ADM_finalize_data_operation.c +++ b/examples/c/ADM_finalize_data_operation.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,18 +51,23 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_adhoc_resources_t adhoc_resources = ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES); assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { assert(job_resources); ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_get_pending_transfers.c b/examples/c/ADM_get_pending_transfers.c index 6789136cf19e5018d395e82067fe8df395f80feb..7f9e49d3d7d73a4a3d1ea759ab19471d0efadec2 100644 --- a/examples/c/ADM_get_pending_transfers.c +++ b/examples/c/ADM_get_pending_transfers.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,10 +51,15 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -69,8 +70,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_get_qos_constraints.c b/examples/c/ADM_get_qos_constraints.c index be8498ee58579ce6312c60101a29f936146d1cc3..f3e0b8f81278ac2de87d3dfee603f928acc4635d 100644 --- a/examples/c/ADM_get_qos_constraints.c +++ b/examples/c/ADM_get_qos_constraints.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,18 +51,23 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_adhoc_resources_t adhoc_resources = ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES); assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { assert(job_resources); ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_get_statistics.c b/examples/c/ADM_get_statistics.c index 4f0c7db51a95a18898b27b71aa4eea33039328f5..5ce8a715aea320a5ee40b0e3796fef1f9467104e 100644 --- a/examples/c/ADM_get_statistics.c +++ b/examples/c/ADM_get_statistics.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,18 +51,23 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_adhoc_resources_t adhoc_resources = ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES); assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { assert(job_resources); ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_get_transfer_priority.c b/examples/c/ADM_get_transfer_priority.c index 5a5957bdeb46646c39045ec455a06d4b3ecd4008..0442740eb548e0eade2e21d5138656cb5d6d52b7 100644 --- a/examples/c/ADM_get_transfer_priority.c +++ b/examples/c/ADM_get_transfer_priority.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,10 +51,15 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -69,8 +70,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_link_transfer_to_data_operation.c b/examples/c/ADM_link_transfer_to_data_operation.c index bfab8b6b1098e511859f9b2567d430fbe01ecb4e..cbadc2a6877ce2cfc1f65fcb54791357ac9b50a2 100644 --- a/examples/c/ADM_link_transfer_to_data_operation.c +++ b/examples/c/ADM_link_transfer_to_data_operation.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,18 +51,23 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_adhoc_resources_t adhoc_resources = ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES); assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { assert(job_resources); ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_register_adhoc_storage.c b/examples/c/ADM_register_adhoc_storage.c index 0d2ff17e3ac23b331683feec6be8c819dd9b9fec..106251a0a2614664682e8cc6d839ee858009ed5d 100644 --- a/examples/c/ADM_register_adhoc_storage.c +++ b/examples/c/ADM_register_adhoc_storage.c @@ -27,10 +27,6 @@ #include #include "common.h" -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -38,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -78,9 +75,9 @@ main(int argc, char* argv[]) { } // 3. define the adhoc execution context - adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); if(adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing adhoc context\n"); diff --git a/examples/c/ADM_register_job.c b/examples/c/ADM_register_job.c index e4be41538f28117bcaec7e0ad95911a76039e14d..e20a3fae29ca808211d1ea79117787af80d41529 100644 --- a/examples/c/ADM_register_job.c +++ b/examples/c/ADM_register_job.c @@ -27,11 +27,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -39,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -63,8 +59,9 @@ main(int argc, char* argv[]) { ADM_job_resources_t job_resources = NULL; ADM_job_requirements_t reqs = NULL; uint64_t slurm_job_id = 42; - ADM_dataset_t* inputs = NULL; - ADM_dataset_t* outputs = NULL; + ADM_dataset_route_t* inputs = NULL; + ADM_dataset_route_t* outputs = NULL; + ADM_dataset_route_t* expected_outputs = NULL; // Let's prepare all the information required by the API calls. // ADM_register_job() often requires an adhoc storage to have been @@ -88,9 +85,9 @@ main(int argc, char* argv[]) { } // 3. the adhoc storage execution context - adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); if(adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing adhoc context\n"); @@ -137,21 +134,29 @@ main(int argc, char* argv[]) { } // 2. the job's requirements - inputs = prepare_datasets("input-dataset-%d", NINPUTS); + inputs = prepare_routes("%s-input-dataset-%d", NINPUTS); if(inputs == NULL) { fprintf(stderr, "Fatal error preparing input datasets\n"); goto cleanup; } - outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + outputs = prepare_routes("%s-output-dataset-%d", NOUTPUTS); if(outputs == NULL) { fprintf(stderr, "Fatal error preparing output datasets\n"); goto cleanup; } + expected_outputs = prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + + if(expected_outputs == NULL) { + fprintf(stderr, "Fatal error preparing expected output datasets\n"); + goto cleanup; + } + if((reqs = ADM_job_requirements_create(inputs, NINPUTS, outputs, NOUTPUTS, + expected_outputs, NEXPOUTPUTS, adhoc_storage)) == NULL) { fprintf(stderr, "ADM_job_requirements_create() failed"); goto cleanup; @@ -191,9 +196,10 @@ cleanup: ADM_server_destroy(server); ADM_job_requirements_destroy(reqs); - destroy_datasets(outputs, NOUTPUTS); - destroy_datasets(inputs, NINPUTS); - ADM_job_resources_destroy(job_resources); + destroy_routes(inputs, NINPUTS); + destroy_routes(outputs, NOUTPUTS); + destroy_routes(expected_outputs, NEXPOUTPUTS); + ADM_job_resources_destroy(job_resources); destroy_nodes(job_nodes, NJOB_NODES); ADM_adhoc_context_destroy(adhoc_ctx); diff --git a/examples/c/ADM_remove_adhoc_storage.c b/examples/c/ADM_remove_adhoc_storage.c index 17d8cf78292b611b48f94fbe0a2ed2566e740c1b..797f1d564f9e8b75d7cc519bca79ed9a1056a8be 100644 --- a/examples/c/ADM_remove_adhoc_storage.c +++ b/examples/c/ADM_remove_adhoc_storage.c @@ -27,10 +27,6 @@ #include #include "common.h" -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -38,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -80,9 +77,9 @@ main(int argc, char* argv[]) { } // 3. the adhoc storage execution context - adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); if(adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing adhoc context\n"); diff --git a/examples/c/ADM_remove_job.c b/examples/c/ADM_remove_job.c index ddf7a2d6f2064df0241c056c767b485e16ba55dc..08c8d076aac704e547319b93bfc42305d2368952 100644 --- a/examples/c/ADM_remove_job.c +++ b/examples/c/ADM_remove_job.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,10 +51,15 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -69,8 +70,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_set_dataset_information.c b/examples/c/ADM_set_dataset_information.c index 548befd80a4773c717dcff6645cf35455784dee3..3c5b77caee51b3ac68f5a51f5db6155b28dc5401 100644 --- a/examples/c/ADM_set_dataset_information.c +++ b/examples/c/ADM_set_dataset_information.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,18 +51,23 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_adhoc_resources_t adhoc_resources = ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES); assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { assert(job_resources); ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_set_io_resources.c b/examples/c/ADM_set_io_resources.c index cdf07a3bbefc20c5f840cb60522bf980ad37ee77..de5547fdb502559bd72fc366dd229ffd58165bcd 100644 --- a/examples/c/ADM_set_io_resources.c +++ b/examples/c/ADM_set_io_resources.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,18 +51,23 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_adhoc_resources_t adhoc_resources = ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES); assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { assert(job_resources); ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_set_qos_constraints.c b/examples/c/ADM_set_qos_constraints.c index a7f07bc11165cafae34ad9b1fff07328e0019b15..f5929ae824801090be06bf1ad0339d3b80ed5025 100644 --- a/examples/c/ADM_set_qos_constraints.c +++ b/examples/c/ADM_set_qos_constraints.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,10 +51,15 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -69,8 +70,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_set_transfer_priority.c b/examples/c/ADM_set_transfer_priority.c index 8ba26be5909a38059d6f4e5377b8016b8b645af8..3d4bb8dc67e9e3bcd5e17adc89ccf19168b152d7 100644 --- a/examples/c/ADM_set_transfer_priority.c +++ b/examples/c/ADM_set_transfer_priority.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,10 +51,15 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -69,8 +70,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +91,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_terminate_adhoc_storage.c b/examples/c/ADM_terminate_adhoc_storage.c index dafd05e84a27dab0871aa0cca46ab14b52b92761..fa58bda37762311e6f72408dc1ec713111692130 100644 --- a/examples/c/ADM_terminate_adhoc_storage.c +++ b/examples/c/ADM_terminate_adhoc_storage.c @@ -27,10 +27,6 @@ #include #include "common.h" -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -38,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -82,9 +79,9 @@ main(int argc, char* argv[]) { } // 3. the adhoc storage execution context - adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); if(adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing adhoc context\n"); @@ -115,9 +112,9 @@ main(int argc, char* argv[]) { // system, let's prepare a new execution context for the adhoc // storage system - new_adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 200, false); + new_adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 200, false); if(new_adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing new adhoc context\n"); diff --git a/examples/c/ADM_transfer_datasets.c b/examples/c/ADM_transfer_datasets.c index 343c603e223ae667938eed9b86808488ec65d82a..e1bc1d8004a68e00d059aa01641eef60d308336b 100644 --- a/examples/c/ADM_transfer_datasets.c +++ b/examples/c/ADM_transfer_datasets.c @@ -28,14 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 -#define NSOURCES 5 -#define NTARGETS 5 -#define NLIMITS 3 - int main(int argc, char* argv[]) { @@ -43,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -58,10 +51,15 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); + assert(expected_outputs); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -72,8 +70,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -93,7 +91,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; diff --git a/examples/c/ADM_transfer_datasets_user.c b/examples/c/ADM_transfer_datasets_user.c index 303567547c944c587a8357faee629ad0eeab7648..3d95903f989dad7625b70ba11ad8ba40cc39a50c 100644 --- a/examples/c/ADM_transfer_datasets_user.c +++ b/examples/c/ADM_transfer_datasets_user.c @@ -35,6 +35,7 @@ main(int argc, char** argv) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; diff --git a/examples/c/ADM_update_adhoc_storage.c b/examples/c/ADM_update_adhoc_storage.c index 823f5504ef32b92dffcc1ae6f4489effbcb41e6a..dfe2bccf86d822a74662bc25acb94e7b85d388b1 100644 --- a/examples/c/ADM_update_adhoc_storage.c +++ b/examples/c/ADM_update_adhoc_storage.c @@ -27,11 +27,6 @@ #include #include "common.h" -#define NADHOC_NODES 25 -#define N_NEW_ADHOC_NODES 10 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -39,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -84,9 +80,9 @@ main(int argc, char* argv[]) { } // 3. the adhoc storage execution context - adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); if(adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing adhoc context\n"); @@ -124,9 +120,9 @@ main(int argc, char* argv[]) { goto cleanup; } - new_adhoc_ctx = ADM_adhoc_context_create(cli_args.controller_address, - ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 200, false); + new_adhoc_ctx = ADM_adhoc_context_create( + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 200, false); if(new_adhoc_ctx == NULL) { fprintf(stderr, "Fatal error preparing new adhoc context\n"); diff --git a/examples/c/ADM_update_job.c b/examples/c/ADM_update_job.c index 25ca673525c4455c0438cbfa9498bb6b9c6cf5d7..401e76d4033bc910c91951564443aaa3dc19d869 100644 --- a/examples/c/ADM_update_job.c +++ b/examples/c/ADM_update_job.c @@ -28,11 +28,6 @@ #include #include "common.h" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -40,6 +35,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; cli_args_t cli_args; @@ -55,10 +51,14 @@ main(int argc, char* argv[]) { assert(job_nodes); ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES); assert(adhoc_nodes); - ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS); + ADM_dataset_route_t* inputs = + prepare_routes("%s-input-dataset-%d", NINPUTS); assert(inputs); - ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS); + ADM_dataset_route_t* outputs = + prepare_routes("%s-output-dataset-%d", NOUTPUTS); assert(outputs); + ADM_dataset_route_t* expected_outputs = + prepare_routes("%s-exp-output-dataset-%d", NEXPOUTPUTS); ADM_job_resources_t job_resources = ADM_job_resources_create(job_nodes, NJOB_NODES); @@ -69,8 +69,8 @@ main(int argc, char* argv[]) { assert(adhoc_resources); ADM_adhoc_context_t ctx = ADM_adhoc_context_create( - cli_args.controller_address, ADM_ADHOC_MODE_SEPARATE_NEW, - ADM_ADHOC_ACCESS_RDWR, 100, false); + cli_args.controller_address, cli_args.data_stager_address, + ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 100, false); assert(ctx); const char* name = "adhoc_storage_42"; @@ -90,7 +90,8 @@ main(int argc, char* argv[]) { } ADM_job_requirements_t reqs = ADM_job_requirements_create( - inputs, NINPUTS, outputs, NOUTPUTS, adhoc_storage); + inputs, NINPUTS, outputs, NOUTPUTS, expected_outputs, NEXPOUTPUTS, + adhoc_storage); assert(reqs); uint64_t slurm_job_id = 42; @@ -128,14 +129,9 @@ main(int argc, char* argv[]) { cleanup: - for(int i = 0; i < NINPUTS; ++i) { - ADM_dataset_destroy(inputs[i]); - } - - for(int i = 0; i < NOUTPUTS; ++i) { - ADM_dataset_destroy(outputs[i]); - } - + destroy_routes(inputs, NINPUTS); + destroy_routes(outputs, NOUTPUTS); + destroy_routes(expected_outputs, NEXPOUTPUTS); ADM_remove_job(server, job); ADM_server_destroy(server); exit(exit_status); diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index 8e793a7b292bdb3a456afeb35249af6ad193d479..070a57133acdc04b2eeb69c469a7eefc9c2ed398 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -83,9 +83,10 @@ if(SCORD_BUILD_TESTS) add_test(run_${TEST_NAME} ${example} ${SCORD_ADDRESS_STRING} - ${SCORD_CTL_ADDRESS_STRING}) + ${SCORD_CTL_ADDRESS_STRING} + ${DATA_STAGER_ADDRESS_STRING}) set_tests_properties(run_${TEST_NAME} - PROPERTIES FIXTURES_REQUIRED "scord_daemon;scord_ctl" + PROPERTIES FIXTURES_REQUIRED "scord_daemon;scord_ctl;cargo" ENVIRONMENT "${TEST_ENV}") add_test(validate_${TEST_NAME} diff --git a/examples/c/common.c b/examples/c/common.c index b19935986c31a73fde8798e65a0b0d2e660fa8a0..d5431c5899a8ea1b238e591f2ae2f6f6c8ac781f 100644 --- a/examples/c/common.c +++ b/examples/c/common.c @@ -18,16 +18,23 @@ process_args(int argc, char* argv[], test_info_t test_info, cli_args_t* args) { ++required_args; } - if(argc != required_args) { + if(test_info.requires_data_stager) { + ++required_args; + } + + /* We accept more arguments than required */ + if(argc < required_args) { fprintf(stderr, "ERROR: missing arguments\n"); - fprintf(stderr, "Usage: %s%s%s\n", test_info.name, + fprintf(stderr, "Usage: %s%s%s%s\n", test_info.name, test_info.requires_server ? " " : "", - test_info.requires_controller ? " " : ""); + test_info.requires_controller ? " " : "", + test_info.requires_data_stager ? " " : ""); return -1; } args->server_address = test_info.requires_server ? argv[1] : NULL; args->controller_address = test_info.requires_controller ? argv[2] : NULL; + args->data_stager_address = test_info.requires_data_stager? argv[3] : NULL; return 0; } @@ -109,6 +116,52 @@ destroy_datasets(ADM_dataset_t datasets[], size_t n) { free(datasets); } +ADM_dataset_route_t* +prepare_routes(const char* pattern, size_t n) { + + ADM_dataset_route_t* routes = calloc(n, sizeof(ADM_dataset_route_t)); + + if(!routes) { + return NULL; + } + + for(size_t i = 0; i < n; ++i) { + size_t len = snprintf(NULL, 0, pattern, "XXX", i); + char* id = (char*) alloca(len + 1); + snprintf(id, len + 1, pattern, "src", i); + ADM_dataset_t src = ADM_dataset_create(id); + snprintf(id, len + 1, pattern, "dst", i); + ADM_dataset_t dst = ADM_dataset_create(id); + + if(!src || !dst) { + return NULL; + } + + routes[i] = ADM_dataset_route_create(src, dst); + if(!routes[i]) { + return NULL; + } + } + + return routes; +} + +void +destroy_routes(ADM_dataset_route_t routes[], size_t n) { + + if(!routes) { + return; + } + + for(size_t i = 0; i < n; ++i) { + if(routes[i]) { + ADM_dataset_route_destroy(routes[i]); + } + } + + free(routes); +} + ADM_qos_limit_t* prepare_qos_limits(size_t n) { diff --git a/examples/c/common.h b/examples/c/common.h index 53d472113ab6c0e1d831319f0979adb99fd4001a..08ed9a8c784fd121bf18eb7f627a3bec2bcb229a 100644 --- a/examples/c/common.h +++ b/examples/c/common.h @@ -3,6 +3,16 @@ #include +#define NJOB_NODES 50 +#define NADHOC_NODES 25 +#define N_NEW_ADHOC_NODES 10 +#define NINPUTS 10 +#define NOUTPUTS 5 +#define NEXPOUTPUTS 1 +#define NSOURCES 5 +#define NTARGETS 5 +#define NLIMITS 3 + #define TESTNAME \ (__builtin_strrchr(__FILE__, '/') ? __builtin_strrchr(__FILE__, '/') + 1 \ : __FILE__) @@ -11,11 +21,13 @@ typedef struct { const char* name; bool requires_server; bool requires_controller; + bool requires_data_stager; } test_info_t; typedef struct { const char* server_address; const char* controller_address; + const char* data_stager_address; } cli_args_t; int @@ -33,6 +45,12 @@ prepare_datasets(const char* pattern, size_t n); void destroy_datasets(ADM_dataset_t datasets[], size_t n); +ADM_dataset_route_t* +prepare_routes(const char* pattern, size_t n); + +void +destroy_routes(ADM_dataset_route_t routes[], size_t n); + ADM_qos_limit_t* prepare_qos_limits(size_t n); diff --git a/examples/cxx/ADM_cancel_transfer.cpp b/examples/cxx/ADM_cancel_transfer.cpp index d2791147037f6c826c404c9f8e470d93b8a81a8e..7ed4a55c3a36e3e9c37b505065f196d3a13eaf4d 100644 --- a/examples/cxx/ADM_cancel_transfer.cpp +++ b/examples/cxx/ADM_cancel_transfer.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_connect_data_operation.cpp b/examples/cxx/ADM_connect_data_operation.cpp index d3bc9ac1d63c15841e4cea875d81ff3b7a5d1176..74499a8deb53f492e4cebbf71206c7aa26191585 100644 --- a/examples/cxx/ADM_connect_data_operation.cpp +++ b/examples/cxx/ADM_connect_data_operation.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_define_data_operation.cpp b/examples/cxx/ADM_define_data_operation.cpp index e34a8d05568000ab5243c7f1e2c980b756825aef..097a8c00926639f750bbbd3576d9e2778cacee94 100644 --- a/examples/cxx/ADM_define_data_operation.cpp +++ b/examples/cxx/ADM_define_data_operation.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_deploy_adhoc_storage.cpp b/examples/cxx/ADM_deploy_adhoc_storage.cpp index 93030a2706abe9862c864a96f7a36c191b583ef0..0944ee1a694436215482e5a4fcb6a36c0210713c 100644 --- a/examples/cxx/ADM_deploy_adhoc_storage.cpp +++ b/examples/cxx/ADM_deploy_adhoc_storage.cpp @@ -38,6 +38,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -51,8 +52,11 @@ main(int argc, char* argv[]) { std::string name = "adhoc_storage_42"; const auto adhoc_storage_ctx = scord::adhoc_storage::ctx{ cli_args.controller_address, + cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, - scord::adhoc_storage::access_type::read_write, 100, false}; + scord::adhoc_storage::access_type::read_write, + 100, + false}; const auto adhoc_resources = scord::adhoc_storage::resources{adhoc_nodes}; try { diff --git a/examples/cxx/ADM_finalize_data_operation.cpp b/examples/cxx/ADM_finalize_data_operation.cpp index a9148a199b83d1d589412b3a1f612b0bac99a588..acc18a1978b87b745e785435e66e567088ce4895 100644 --- a/examples/cxx/ADM_finalize_data_operation.cpp +++ b/examples/cxx/ADM_finalize_data_operation.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_get_pending_transfers.cpp b/examples/cxx/ADM_get_pending_transfers.cpp index 60f7d9eb7976f33614b0d2ad16798050016204ed..e5bd97e072cce04fbf4a36bb0037de78b5e21b5f 100644 --- a/examples/cxx/ADM_get_pending_transfers.cpp +++ b/examples/cxx/ADM_get_pending_transfers.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_get_qos_constraints.cpp b/examples/cxx/ADM_get_qos_constraints.cpp index 389ac2b199d14312b396c109f156eb1e8bf2ad26..04d3c62e01a3cdf2bdf7c4db442a7c931cb80a09 100644 --- a/examples/cxx/ADM_get_qos_constraints.cpp +++ b/examples/cxx/ADM_get_qos_constraints.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_get_statistics.cpp b/examples/cxx/ADM_get_statistics.cpp index a19e506b061a2dd7b21ec8ea9c3f909ad9e06294..15a12c95ff66e4cd9d2e53f76861500c3ead083d 100644 --- a/examples/cxx/ADM_get_statistics.cpp +++ b/examples/cxx/ADM_get_statistics.cpp @@ -29,11 +29,10 @@ int main(int argc, char* argv[]) { - test_info test_info{ - .name = TESTNAME, - .requires_server = true, - .requires_controller = true, - }; + test_info test_info{.name = TESTNAME, + .requires_server = true, + .requires_controller = true, + .requires_data_stager = true}; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_get_transfer_priority.cpp b/examples/cxx/ADM_get_transfer_priority.cpp index 983c39c18bed25e01368f5744a79f8f91b78c45c..6db261c9ad038cba2fcab7a48165c2dab9606dcc 100644 --- a/examples/cxx/ADM_get_transfer_priority.cpp +++ b/examples/cxx/ADM_get_transfer_priority.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_link_transfer_to_data_operation.cpp b/examples/cxx/ADM_link_transfer_to_data_operation.cpp index 2227afd1e9ef3095e524c8d65252c0cbd9c863dd..a8256cc2e65886c4bd2197fc20a7fd984155b075 100644 --- a/examples/cxx/ADM_link_transfer_to_data_operation.cpp +++ b/examples/cxx/ADM_link_transfer_to_data_operation.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_ping.cpp b/examples/cxx/ADM_ping.cpp index ef796074bc7d6f9ba4aad2dbf59d2394157421de..abc6259613dc656221b243975f2e2f0bb34f1319 100644 --- a/examples/cxx/ADM_ping.cpp +++ b/examples/cxx/ADM_ping.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = false, + .requires_data_stager = false, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_register_adhoc_storage.cpp b/examples/cxx/ADM_register_adhoc_storage.cpp index 0dfd0f9ba7ccfea5e2de05ec45105d0c67ebbb5b..d2c07d40f67ec12317881d77e015a54ebc012789 100644 --- a/examples/cxx/ADM_register_adhoc_storage.cpp +++ b/examples/cxx/ADM_register_adhoc_storage.cpp @@ -38,6 +38,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -51,8 +52,11 @@ main(int argc, char* argv[]) { std::string name = "adhoc_storage_42"; const auto adhoc_storage_ctx = scord::adhoc_storage::ctx{ cli_args.controller_address, + cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, - scord::adhoc_storage::access_type::read_write, 100, false}; + scord::adhoc_storage::access_type::read_write, + 100, + false}; const auto adhoc_resources = scord::adhoc_storage::resources{adhoc_nodes}; try { diff --git a/examples/cxx/ADM_register_job.cpp b/examples/cxx/ADM_register_job.cpp index 55ac84d6570dc2739a12d9ce07846cdbfcc78e26..2da8de734b33a6695a02bf785c93e84e6d33760e 100644 --- a/examples/cxx/ADM_register_job.cpp +++ b/examples/cxx/ADM_register_job.cpp @@ -38,6 +38,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -46,14 +47,19 @@ main(int argc, char* argv[]) { const auto job_nodes = prepare_nodes(NJOB_NODES); const auto adhoc_nodes = prepare_nodes(NADHOC_NODES); - const auto inputs = prepare_datasets("input-dataset-{}", NINPUTS); - const auto outputs = prepare_datasets("output-dataset-{}", NOUTPUTS); + const auto inputs = prepare_routes("{}-input-dataset-{}", NINPUTS); + const auto outputs = prepare_routes("{}-output-dataset-{}", NOUTPUTS); + const auto expected_outputs = + prepare_routes("{}-exp-output-dataset-{}", NEXPOUTPUTS); std::string name = "adhoc_storage_42"; const auto adhoc_storage_ctx = scord::adhoc_storage::ctx{ cli_args.controller_address, + cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, - scord::adhoc_storage::access_type::read_write, 100, false}; + scord::adhoc_storage::access_type::read_write, + 100, + false}; const auto adhoc_resources = scord::adhoc_storage::resources{adhoc_nodes}; try { @@ -62,7 +68,8 @@ main(int argc, char* argv[]) { server, name, scord::adhoc_storage::type::gekkofs, adhoc_storage_ctx, adhoc_resources); - scord::job::requirements reqs(inputs, outputs, adhoc_storage); + scord::job::requirements reqs(inputs, outputs, expected_outputs, + adhoc_storage); [[maybe_unused]] const auto job = scord::register_job( server, scord::job::resources{job_nodes}, reqs, 0); diff --git a/examples/cxx/ADM_register_pfs_storage.cpp b/examples/cxx/ADM_register_pfs_storage.cpp index 1078262c91a9233a35a53175064961d7fe7f28dd..54ac2f0092139a6561eff10baf874d9b23a20324 100644 --- a/examples/cxx/ADM_register_pfs_storage.cpp +++ b/examples/cxx/ADM_register_pfs_storage.cpp @@ -34,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = false, + .requires_data_stager = false, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_remove_adhoc_storage.cpp b/examples/cxx/ADM_remove_adhoc_storage.cpp index aa496ee6fd002006a39fa7ce4626b5a698b85980..05d9d8b0bf2ca54b5ca52460b82badf4deaf531f 100644 --- a/examples/cxx/ADM_remove_adhoc_storage.cpp +++ b/examples/cxx/ADM_remove_adhoc_storage.cpp @@ -38,6 +38,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -51,8 +52,11 @@ main(int argc, char* argv[]) { std::string name = "adhoc_storage_42"; const auto adhoc_storage_ctx = scord::adhoc_storage::ctx{ cli_args.controller_address, + cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, - scord::adhoc_storage::access_type::read_write, 100, false}; + scord::adhoc_storage::access_type::read_write, + 100, + false}; const auto adhoc_resources = scord::adhoc_storage::resources{adhoc_nodes}; try { diff --git a/examples/cxx/ADM_remove_job.cpp b/examples/cxx/ADM_remove_job.cpp index 4a3ae71cdf7b09ad908520267e879a989ae1a6b6..408a16ef27a91997503d44152a9818281404afd2 100644 --- a/examples/cxx/ADM_remove_job.cpp +++ b/examples/cxx/ADM_remove_job.cpp @@ -26,11 +26,6 @@ #include #include "common.hpp" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -38,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -46,14 +42,19 @@ main(int argc, char* argv[]) { const auto job_nodes = prepare_nodes(NJOB_NODES); const auto adhoc_nodes = prepare_nodes(NADHOC_NODES); - const auto inputs = prepare_datasets("input-dataset-{}", NINPUTS); - const auto outputs = prepare_datasets("output-dataset-{}", NOUTPUTS); + const auto inputs = prepare_routes("{}-input-dataset-{}", NINPUTS); + const auto outputs = prepare_routes("{}-output-dataset-{}", NOUTPUTS); + const auto expected_outputs = + prepare_routes("{}-exp-output-dataset-{}", NEXPOUTPUTS); std::string name = "adhoc_storage_42"; const auto adhoc_storage_ctx = scord::adhoc_storage::ctx{ cli_args.controller_address, + cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, - scord::adhoc_storage::access_type::read_write, 100, false}; + scord::adhoc_storage::access_type::read_write, + 100, + false}; const auto adhoc_resources = scord::adhoc_storage::resources{adhoc_nodes}; @@ -63,7 +64,8 @@ main(int argc, char* argv[]) { server, name, scord::adhoc_storage::type::gekkofs, adhoc_storage_ctx, adhoc_resources); - scord::job::requirements reqs(inputs, outputs, adhoc_storage); + scord::job::requirements reqs(inputs, outputs, expected_outputs, + adhoc_storage); [[maybe_unused]] const auto job = scord::register_job( server, scord::job::resources{job_nodes}, reqs, 0); diff --git a/examples/cxx/ADM_remove_pfs_storage.cpp b/examples/cxx/ADM_remove_pfs_storage.cpp index 8e5bd527ea2a0de334092552c4b4b3616a11f082..21bcfddf4d309275daa622ec42a57b02c5cf292f 100644 --- a/examples/cxx/ADM_remove_pfs_storage.cpp +++ b/examples/cxx/ADM_remove_pfs_storage.cpp @@ -34,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = false, + .requires_data_stager = false, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_set_dataset_information.cpp b/examples/cxx/ADM_set_dataset_information.cpp index 863e677424d113350f2eaeb9f5b4c7a95ab5a1e6..7ddc6c0d4641e41210060ce3f0a0f40f85faec02 100644 --- a/examples/cxx/ADM_set_dataset_information.cpp +++ b/examples/cxx/ADM_set_dataset_information.cpp @@ -34,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_set_io_resources.cpp b/examples/cxx/ADM_set_io_resources.cpp index 425b4642379614b73bd0115e6669b2c64fdec12e..17acf0ae9e2eed45826e1cfb0ae4eea9a85c5a23 100644 --- a/examples/cxx/ADM_set_io_resources.cpp +++ b/examples/cxx/ADM_set_io_resources.cpp @@ -34,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_set_qos_constraints.cpp b/examples/cxx/ADM_set_qos_constraints.cpp index d487b57603e81a9adc07f0bd3a248aea628c9c2f..55e6efb04eda1ace73e28f9370067d88fbb936d1 100644 --- a/examples/cxx/ADM_set_qos_constraints.cpp +++ b/examples/cxx/ADM_set_qos_constraints.cpp @@ -33,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_set_transfer_priority.cpp b/examples/cxx/ADM_set_transfer_priority.cpp index 39819b478966842685c233044fc90d3c84d48b09..86416f1c06264db81850f13093d7b34fd20f8cd1 100644 --- a/examples/cxx/ADM_set_transfer_priority.cpp +++ b/examples/cxx/ADM_set_transfer_priority.cpp @@ -34,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/ADM_terminate_adhoc_storage.cpp b/examples/cxx/ADM_terminate_adhoc_storage.cpp index e7b26594d6ba785851e54a85f941daecffc16eb9..737e1d4fbf3b766786fe15ccb557c2105c7b2bbf 100644 --- a/examples/cxx/ADM_terminate_adhoc_storage.cpp +++ b/examples/cxx/ADM_terminate_adhoc_storage.cpp @@ -38,6 +38,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -51,8 +52,11 @@ main(int argc, char* argv[]) { std::string name = "adhoc_storage_42"; const auto adhoc_storage_ctx = scord::adhoc_storage::ctx{ cli_args.controller_address, + cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, - scord::adhoc_storage::access_type::read_write, 100, false}; + scord::adhoc_storage::access_type::read_write, + 100, + false}; const auto adhoc_resources = scord::adhoc_storage::resources{adhoc_nodes}; try { diff --git a/examples/cxx/ADM_transfer_datasets.cpp b/examples/cxx/ADM_transfer_datasets.cpp index 2b4e6f02c6373f7f404c305a449b97bdb328ba57..1028658c880a016ab6676618323d6bafd38b4269 100644 --- a/examples/cxx/ADM_transfer_datasets.cpp +++ b/examples/cxx/ADM_transfer_datasets.cpp @@ -41,6 +41,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -49,8 +50,10 @@ main(int argc, char* argv[]) { const auto job_nodes = prepare_nodes(NJOB_NODES); const auto adhoc_nodes = prepare_nodes(NADHOC_NODES); - const auto inputs = prepare_datasets("input-dataset-{}", NINPUTS); - const auto outputs = prepare_datasets("output-dataset-{}", NOUTPUTS); + const auto inputs = prepare_routes("{}-input-dataset-{}", NINPUTS); + const auto outputs = prepare_routes("{}-output-dataset-{}", NOUTPUTS); + const auto expected_outputs = + prepare_routes("{}-exp-output-dataset-{}", NEXPOUTPUTS); const auto sources = prepare_datasets("source-dataset-{}", NSOURCES); const auto targets = prepare_datasets("target-dataset-{}", NTARGETS); @@ -60,8 +63,11 @@ main(int argc, char* argv[]) { std::string name = "adhoc_storage_42"; const auto adhoc_storage_ctx = scord::adhoc_storage::ctx{ cli_args.controller_address, + cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, - scord::adhoc_storage::access_type::read_write, 100, false}; + scord::adhoc_storage::access_type::read_write, + 100, + false}; const auto adhoc_resources = scord::adhoc_storage::resources{adhoc_nodes}; try { @@ -69,7 +75,8 @@ main(int argc, char* argv[]) { server, name, scord::adhoc_storage::type::gekkofs, adhoc_storage_ctx, adhoc_resources); - scord::job::requirements reqs(inputs, outputs, adhoc_storage); + scord::job::requirements reqs(inputs, outputs, expected_outputs, + adhoc_storage); const auto job = scord::register_job( server, scord::job::resources{job_nodes}, reqs, 0); diff --git a/examples/cxx/ADM_update_adhoc_storage.cpp b/examples/cxx/ADM_update_adhoc_storage.cpp index dbb2ac93c1c3fa9be59fed26ae1c89e38db0c3ce..a7b88deb0b3d1ee0c43b34c43a3e2629f6198d55 100644 --- a/examples/cxx/ADM_update_adhoc_storage.cpp +++ b/examples/cxx/ADM_update_adhoc_storage.cpp @@ -37,6 +37,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -51,8 +52,11 @@ main(int argc, char* argv[]) { std::string name = "adhoc_storage_42"; const auto adhoc_storage_ctx = scord::adhoc_storage::ctx{ cli_args.controller_address, + cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, - scord::adhoc_storage::access_type::read_write, 100, false}; + scord::adhoc_storage::access_type::read_write, + 100, + false}; const auto adhoc_resources = scord::adhoc_storage::resources{adhoc_nodes}; const auto new_adhoc_resources = diff --git a/examples/cxx/ADM_update_job.cpp b/examples/cxx/ADM_update_job.cpp index 1ee22d589cfbcf9573c5d315aa44344478f00e8e..72533de8ba11277d0fb26778789ff30dff0d0e36 100644 --- a/examples/cxx/ADM_update_job.cpp +++ b/examples/cxx/ADM_update_job.cpp @@ -26,11 +26,6 @@ #include #include "common.hpp" -#define NJOB_NODES 50 -#define NADHOC_NODES 25 -#define NINPUTS 10 -#define NOUTPUTS 5 - int main(int argc, char* argv[]) { @@ -38,6 +33,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = true, + .requires_data_stager = true, }; const auto cli_args = process_args(argc, argv, test_info); @@ -47,18 +43,21 @@ main(int argc, char* argv[]) { const auto job_nodes = prepare_nodes(NJOB_NODES); const auto new_job_nodes = prepare_nodes(NJOB_NODES * 2); const auto adhoc_nodes = prepare_nodes(NADHOC_NODES); - const auto inputs = prepare_datasets("input-dataset-{}", NINPUTS); - const auto outputs = prepare_datasets("output-dataset-{}", NOUTPUTS); + const auto inputs = prepare_routes("{}-input-dataset-{}", NINPUTS); + const auto outputs = prepare_routes("{}-output-dataset-{}", NOUTPUTS); + const auto expected_outputs = + prepare_routes("{}-exp-output-dataset-{}", NEXPOUTPUTS); const auto gkfs_storage = scord::register_adhoc_storage( server, "foobar", scord::adhoc_storage::type::gekkofs, scord::adhoc_storage::ctx{ - cli_args.controller_address, + cli_args.controller_address, cli_args.data_stager_address, scord::adhoc_storage::execution_mode::separate_new, scord::adhoc_storage::access_type::read_write, 100, false}, scord::adhoc_storage::resources{adhoc_nodes}); - scord::job::requirements reqs{inputs, outputs, gkfs_storage}; + scord::job::requirements reqs{inputs, outputs, expected_outputs, + gkfs_storage}; const auto new_inputs = prepare_datasets("input-new-dataset-{}", NINPUTS); const auto new_outputs = diff --git a/examples/cxx/ADM_update_pfs_storage.cpp b/examples/cxx/ADM_update_pfs_storage.cpp index 019fe1104fa0ffb466822cacb1db9dcca6a8168b..699d0ea9a238d584b1578a4f97cfe6ebb2ebc67c 100644 --- a/examples/cxx/ADM_update_pfs_storage.cpp +++ b/examples/cxx/ADM_update_pfs_storage.cpp @@ -34,6 +34,7 @@ main(int argc, char* argv[]) { .name = TESTNAME, .requires_server = true, .requires_controller = false, + .requires_data_stager = false, }; const auto cli_args = process_args(argc, argv, test_info); diff --git a/examples/cxx/CMakeLists.txt b/examples/cxx/CMakeLists.txt index cadabb2ed61d26ed7b8e24246125601d4aa9b915..86580f1f903e150e16b8ff55e332fe26a85879fd 100644 --- a/examples/cxx/CMakeLists.txt +++ b/examples/cxx/CMakeLists.txt @@ -74,7 +74,8 @@ if(SCORD_BUILD_TESTS) add_test(run_${TEST_NAME} ${example} ${SCORD_ADDRESS_STRING} - ${SCORD_CTL_ADDRESS_STRING}) + ${SCORD_CTL_ADDRESS_STRING} + ${DATA_STAGER_ADDRESS_STRING}) set_tests_properties(run_${TEST_NAME} PROPERTIES FIXTURES_REQUIRED "scord_daemon;scord_ctl" ENVIRONMENT "${TEST_ENV}") diff --git a/examples/cxx/common.cpp b/examples/cxx/common.cpp index cf83a8a90369d9b5145576297ac78f0870c8bf36..bc290c1f5861e1725308548ba3af273304a3af86 100644 --- a/examples/cxx/common.cpp +++ b/examples/cxx/common.cpp @@ -15,17 +15,24 @@ process_args(int argc, char* argv[], const test_info& test_info) { ++required_args; } + if(test_info.requires_data_stager) { + ++required_args; + } + if(argc != required_args) { fmt::print(stderr, "ERROR: missing arguments\n"); - fmt::print(stderr, "Usage: {}{}{}\n", test_info.name, + fmt::print(stderr, "Usage: {}{}{}{}\n", test_info.name, test_info.requires_server ? " " : "", - test_info.requires_controller ? " " - : ""); + test_info.requires_controller ? " " : "", + test_info.requires_data_stager ? " " + : ""); exit(EXIT_FAILURE); } return cli_args{test_info.requires_server ? std::string{argv[1]} : ""s, - test_info.requires_controller ? std::string{argv[2]} : ""s}; + test_info.requires_controller ? std::string{argv[2]} : ""s, + test_info.requires_data_stager ? std::string{argv[3]} + : ""s}; } std::vector @@ -50,6 +57,19 @@ prepare_datasets(const std::string& pattern, size_t n) { return datasets; } +std::vector +prepare_routes(const std::string& pattern, size_t n) { + std::vector routes; + routes.reserve(n); + for(size_t i = 0; i < n; ++i) { + routes.emplace_back( + scord::dataset{fmt::format(fmt::runtime(pattern), "src", i)}, + scord::dataset{fmt::format(fmt::runtime(pattern), "dst", i)}); + } + + return routes; +} + std::vector prepare_qos_limits(size_t n) { diff --git a/examples/cxx/common.hpp b/examples/cxx/common.hpp index 374ad2644fff45251062ba13f54644f12fff9754..4cc0f836f058a7a4d96549727ee257ceedf5a316 100644 --- a/examples/cxx/common.hpp +++ b/examples/cxx/common.hpp @@ -4,6 +4,12 @@ #include #include +#define NJOB_NODES 50 +#define NADHOC_NODES 25 +#define NINPUTS 10 +#define NOUTPUTS 5 +#define NEXPOUTPUTS 1 + #define TESTNAME \ (__builtin_strrchr(__FILE__, '/') ? __builtin_strrchr(__FILE__, '/') + 1 \ : __FILE__) @@ -12,11 +18,13 @@ struct test_info { std::string name; bool requires_server; bool requires_controller; + bool requires_data_stager; }; struct cli_args { std::string server_address; std::string controller_address; + std::string data_stager_address; }; cli_args @@ -28,6 +36,9 @@ prepare_nodes(size_t n); std::vector prepare_datasets(const std::string& pattern, size_t n); +std::vector +prepare_routes(const std::string& pattern, size_t n); + std::vector prepare_qos_limits(size_t n); diff --git a/examples/scord-ctl.conf.in b/examples/scord-ctl.conf.in index 58ba161613c2c98ac2311648a8babf31865c1452..83a02a429c8d14ae1afa145d17d6ff4006d659af 100644 --- a/examples/scord-ctl.conf.in +++ b/examples/scord-ctl.conf.in @@ -35,7 +35,145 @@ config: command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/gekkofs.sh stop --workdir {ADHOC_DIRECTORY} + expand: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/gekkofs.sh + expand + --hosts {ADHOC_NODES} + + shrink: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/gekkofs.sh + shrink + --hosts {ADHOC_NODES} + dataclay: + # The default working directory for adhoc instances of this type + working_directory: /tmp/dataclay + startup: + # Specific environment variables that should be set for the adhoc + # instance. These will be merged with the environment variables + # already set by Slurm. + environment: + VAR0: value0 + VAR1: value1 + # The command that `scord-ctl` will use to start an adhoc instance of + # this type. The following variables are supported that will be + # automatically replaced by scord-ctl if found between curly braces: + # * ADHOC_NODES: A comma separated list of valid job hostnames that + # can be used to start the adhoc instance. + # * ADHOC_DIRECTORY: A unique working directory for each specific + # adhoc instance. This directory will be created by scord-ctl under + # `working_directory` and automatically removed after the adhoc + # instance has been shut down. + # * ADHOC_ID: - A unique ID for the adhoc instance. + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/dataclay.sh + start + --hosts {ADHOC_NODES} + --workdir {ADHOC_DIRECTORY} + --datadir {ADHOC_DIRECTORY}/data + --mountdir {ADHOC_DIRECTORY}/mnt + shutdown: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/dataclay.sh + stop + --workdir {ADHOC_DIRECTORY} + expand: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/dataclay.sh + expand + --hosts {ADHOC_NODES} + + shrink: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/dataclay.sh + shrink + --hosts {ADHOC_NODES} + + expand: + # The default working directory for adhoc instances of this type + working_directory: /tmp/expand + startup: + # Specific environment variables that should be set for the adhoc + # instance. These will be merged with the environment variables + # already set by Slurm. + environment: + VAR0: value0 + VAR1: value1 + # The command that `scord-ctl` will use to start an adhoc instance of + # this type. The following variables are supported that will be + # automatically replaced by scord-ctl if found between curly braces: + # * ADHOC_NODES: A comma separated list of valid job hostnames that + # can be used to start the adhoc instance. + # * ADHOC_DIRECTORY: A unique working directory for each specific + # adhoc instance. This directory will be created by scord-ctl under + # `working_directory` and automatically removed after the adhoc + # instance has been shut down. + # * ADHOC_ID: - A unique ID for the adhoc instance. + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/expand.sh + start + --hosts {ADHOC_NODES} + --workdir {ADHOC_DIRECTORY} + --datadir {ADHOC_DIRECTORY}/data + --mountdir {ADHOC_DIRECTORY}/mnt + shutdown: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/expand.sh + stop + --workdir {ADHOC_DIRECTORY} + expand: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/expand.sh + expand + --hosts {ADHOC_NODES} + + shrink: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/expand.sh + shrink + --hosts {ADHOC_NODES} + hercules: + # The default working directory for adhoc instances of this type + working_directory: /tmp/hercules + startup: + # Specific environment variables that should be set for the adhoc + # instance. These will be merged with the environment variables + # already set by Slurm. + environment: + VAR0: value0 + VAR1: value1 + # The command that `scord-ctl` will use to start an adhoc instance of + # this type. The following variables are supported that will be + # automatically replaced by scord-ctl if found between curly braces: + # * ADHOC_NODES: A comma separated list of valid job hostnames that + # can be used to start the adhoc instance. + # * ADHOC_DIRECTORY: A unique working directory for each specific + # adhoc instance. This directory will be created by scord-ctl under + # `working_directory` and automatically removed after the adhoc + # instance has been shut down. + # * ADHOC_ID: - A unique ID for the adhoc instance. + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/hercules.sh + start + --hosts {ADHOC_NODES} + --workdir {ADHOC_DIRECTORY} + --datadir {ADHOC_DIRECTORY}/data + --mountdir {ADHOC_DIRECTORY}/mnt + shutdown: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/hercules.sh + stop + --workdir {ADHOC_DIRECTORY} + expand: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/hercules.sh + expand + --hosts {ADHOC_NODES} + + shrink: + environment: + command: @CMAKE_BINARY_DIR@/plugins/adhoc_services.d/hercules.sh + shrink + --hosts {ADHOC_NODES} # default storage tiers made available to applications storage: diff --git a/plugins/adhoc_services.d/CMakeLists.txt b/plugins/adhoc_services.d/CMakeLists.txt index 143a7e9d65eee259c61fa66d0593b2e0a84a19ba..2a5d03267cbb4e693b6d8d447772847b8ac61b4a 100644 --- a/plugins/adhoc_services.d/CMakeLists.txt +++ b/plugins/adhoc_services.d/CMakeLists.txt @@ -22,7 +22,10 @@ # SPDX-License-Identifier: GPL-3.0-or-later # ################################################################################ -list(APPEND ADHOC_SCRIPTS "${CMAKE_CURRENT_SOURCE_DIR}/gekkofs.sh") +list(APPEND ADHOC_SCRIPTS "${CMAKE_CURRENT_SOURCE_DIR}/gekkofs.sh" +"${CMAKE_CURRENT_SOURCE_DIR}/expand.sh" +"${CMAKE_CURRENT_SOURCE_DIR}/dataclay.sh" +"${CMAKE_CURRENT_SOURCE_DIR}/hercules.sh") # copy adhoc scripts to the build directory so that they can be used by tests foreach (ADHOC_SCRIPT ${ADHOC_SCRIPTS}) diff --git a/plugins/adhoc_services.d/dataclay.sh b/plugins/adhoc_services.d/dataclay.sh new file mode 100644 index 0000000000000000000000000000000000000000..0fdcf0910beedfa75021868b1239cad921f27110 --- /dev/null +++ b/plugins/adhoc_services.d/dataclay.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +exit 0 diff --git a/plugins/adhoc_services.d/expand.sh b/plugins/adhoc_services.d/expand.sh new file mode 100644 index 0000000000000000000000000000000000000000..0fdcf0910beedfa75021868b1239cad921f27110 --- /dev/null +++ b/plugins/adhoc_services.d/expand.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +exit 0 diff --git a/plugins/adhoc_services.d/gekkofs.sh b/plugins/adhoc_services.d/gekkofs.sh index 0fdcf0910beedfa75021868b1239cad921f27110..6544724eac3b9c7ac90e22678b9f045bf4269d7c 100644 --- a/plugins/adhoc_services.d/gekkofs.sh +++ b/plugins/adhoc_services.d/gekkofs.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash - +echo "GEKKOFS Script Called" exit 0 diff --git a/plugins/adhoc_services.d/hercules.sh b/plugins/adhoc_services.d/hercules.sh new file mode 100644 index 0000000000000000000000000000000000000000..0fdcf0910beedfa75021868b1239cad921f27110 --- /dev/null +++ b/plugins/adhoc_services.d/hercules.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +exit 0 diff --git a/plugins/slurm/defaults.h.in b/plugins/slurm/defaults.h.in index 0568e0990df4a4d11348b24e1f215257aa411cf9..9de23bc60bdbf9b9bacde3be859b1928c1cad93d 100644 --- a/plugins/slurm/defaults.h.in +++ b/plugins/slurm/defaults.h.in @@ -37,6 +37,9 @@ #define SCORDCTL_PROTO_DEFAULT SCORD_PROTO_DEFAULT #define SCORDCTL_PORT_DEFAULT @SCORD_CTL_BIND_PORT@ #define SCORDCTL_TMPDIR_DEFAULT "/tmp" +#define CARGO_PROG_DEFAULT "@CARGO_PROGRAM@" +#define CARGO_PROTO_DEFAULT SCORD_PROTO_DEFAULT +#define CARGO_PORT_DEFAULT 62000 // clang-format on diff --git a/plugins/slurm/slurmadmcli.c b/plugins/slurm/slurmadmcli.c index 204c084005da5898d05bc578015152f308d6ea49..61d10078627ca702b97ff695265cdfc11a9dbe80 100644 --- a/plugins/slurm/slurmadmcli.c +++ b/plugins/slurm/slurmadmcli.c @@ -32,6 +32,7 @@ #include #include +#include #include "defaults.h" #include "utils.h" @@ -48,16 +49,16 @@ #define ADHOCID_LEN 64 #define INT32_STR_LEN 16 /* 16 chars are enough to fit an int32 in decimal */ -#define TAG_NNODES 0 -#define TAG_ADHOC_TYPE 1 -#define TAG_ADHOC_OVERLAP 2 -#define TAG_ADHOC_EXCLUSIVE 3 -#define TAG_ADHOC_DEDICATED 4 -#define TAG_ADHOC_REMOTE 5 -#define TAG_DATASET_INPUT 6 -#define TAG_DATASET_OUTPUT 7 -#define TAG_DATASET_EXPECT_OUTPUT 8 -#define TAG_DATASET_INOUT 9 +#define TAG_NNODES 0 +#define TAG_ADHOC_TYPE 1 +#define TAG_ADHOC_OVERLAP 2 +#define TAG_ADHOC_EXCLUSIVE 3 +#define TAG_ADHOC_DEDICATED 4 +#define TAG_ADHOC_REMOTE 5 +#define TAG_DATASET_INPUT 6 +#define TAG_DATASET_OUTPUT 7 +#define TAG_DATASET_EXPECTED_OUTPUT 8 +#define TAG_DATASET_EXPECTED_INOUT_DATASET 9 // clang-format off SPANK_PLUGIN (admire-cli, 1) @@ -71,6 +72,14 @@ static long adhoc_walltime = 0; static ADM_adhoc_mode_t adhoc_mode = ADM_ADHOC_MODE_IN_JOB_SHARED; static ADM_adhoc_storage_type_t adhoc_type = 0; static char adhoc_id[ADHOCID_LEN] = {0}; +ADM_dataset_route_t* input_datasets = NULL; +size_t input_datasets_count = 0; +ADM_dataset_route_t* output_datasets = NULL; +size_t output_datasets_count = 0; +ADM_dataset_route_t* expected_output_datasets = NULL; +size_t expected_output_datasets_count = 0; +ADM_dataset_route_t* expected_inout_datasets = NULL; +size_t expected_inout_datasets_count = 0; /* server-related options */ typedef struct { @@ -84,6 +93,7 @@ typedef struct { typedef struct { scord_server_info_t scord_info; scord_server_info_t scordctl_info; + scord_server_info_t cargo_info; } scord_plugin_config_t; @@ -97,7 +107,12 @@ static scord_plugin_config_t default_cfg = { .proto = SCORDCTL_PROTO_DEFAULT, .port = SCORDCTL_PORT_DEFAULT, .prog = SCORDCTL_PROG_DEFAULT, - .tmpdir = SCORDCTL_TMPDIR_DEFAULT}}; + .tmpdir = SCORDCTL_TMPDIR_DEFAULT}, + .cargo_info = {.addr = NULL, + .proto = CARGO_PROTO_DEFAULT, + .port = CARGO_PORT_DEFAULT, + .prog = CARGO_PROG_DEFAULT, + .tmpdir = NULL}}; static int process_opts(int tag, const char* optarg, int remote); @@ -169,9 +184,9 @@ struct spank_option spank_opts[] = { (spank_opt_cb_f) process_opts /* callback */ }, { - "adm-input", "dataset-routing", + "adm-input-datasets", "dataset-route[,dataset-route...]", "Define datasets that should be transferred between the PFS " - "and the ad-hoc storage service. The `dataset-routing` is " + "and the ad-hoc storage service. The `dataset-route` is " "defined as `ORIGIN-TIER:PATH TARGET-TIER:PATH`. For example," "to transfer the file `input000.dat` from the Lustre PFS to " "the an on-demand GekkoFS ad-hoc storage service, the option " @@ -182,7 +197,7 @@ struct spank_option spank_opts[] = { (spank_opt_cb_f) process_opts /* callback */ }, { - "adm-output", "dataset-routing", + "adm-output-datasets", "dataset-route[,dataset-route...]", "Define datasets that should be automatically transferred " "between the ad-hoc storage system and the PFS. The ad-hoc " "storage will guarantee that the dataset is not transferred " @@ -194,22 +209,24 @@ struct spank_option spank_opts[] = { (spank_opt_cb_f) process_opts /* callback */ }, { - "adm-expect-output", "dataset-routing", + "adm-expected-output-datasets", + "dataset-route[,dataset-route...]", "Define datasets that are expected to be generated by the " "application. When using this option, the application itself " "MUST use the programmatic APIs defined in `scord-user.h`to " "explicitly request the transfer of the datasets.", 1, /* option takes an argument */ - TAG_DATASET_EXPECT_OUTPUT, /* option tag */ + TAG_DATASET_EXPECTED_OUTPUT, /* option tag */ (spank_opt_cb_f) process_opts /* callback */ }, { - "adm-expect-inout", "dataset-routing", + "adm-expected-inout-datasets", + "dataset-route[,dataset-route...]", "Define the datasets that should be transferred INTO " "the ad-hoc storage AND BACK when finished.", - 1, /* option takes an argument */ - TAG_DATASET_INOUT, /* option tag */ - (spank_opt_cb_f) process_opts /* callback */ + 1, /* option takes an argument */ + TAG_DATASET_EXPECTED_INOUT_DATASET, /* option tag */ + (spank_opt_cb_f) process_opts /* callback */ }, SPANK_OPTIONS_TABLE_END}; @@ -217,7 +234,8 @@ int process_opts(int tag, const char* optarg, int remote) { (void) remote; - slurm_debug("%s: %s() called", plugin_name, __func__); + slurm_debug("%s: %s(tag: %d, optarg: %s, remote: %d) called", plugin_name, + __func__, tag, optarg, remote); /* srun & sbatch/salloc */ spank_context_t sctx = spank_context(); @@ -278,6 +296,62 @@ process_opts(int tag, const char* optarg, int remote) { adhoc_id[ADHOCID_LEN - 1] = '\0'; return 0; + case TAG_DATASET_INPUT: + if(input_datasets) { + free(input_datasets); + } + + if(scord_utils_parse_dataset_routes(optarg, &input_datasets, + &input_datasets_count) != + ADM_SUCCESS) { + slurm_error("%s: %s: failed to parse dataset route: %s", + plugin_name, __func__, optarg); + return -1; + } + return 0; + + case TAG_DATASET_OUTPUT: + if(output_datasets) { + free(output_datasets); + } + + if(scord_utils_parse_dataset_routes(optarg, &output_datasets, + &output_datasets_count) != + ADM_SUCCESS) { + slurm_error("%s: %s: failed to parse dataset route: %s", + plugin_name, __func__, optarg); + return -1; + } + return 0; + + case TAG_DATASET_EXPECTED_OUTPUT: + if(expected_output_datasets) { + free(expected_output_datasets); + } + + if(scord_utils_parse_dataset_routes( + optarg, &expected_output_datasets, + &expected_output_datasets_count) != ADM_SUCCESS) { + slurm_error("%s: %s: failed to parse dataset route: %s", + plugin_name, __func__, optarg); + return -1; + } + return 0; + + case TAG_DATASET_EXPECTED_INOUT_DATASET: + if(expected_inout_datasets) { + free(expected_inout_datasets); + } + + if(scord_utils_parse_dataset_routes( + optarg, &expected_inout_datasets, + &expected_inout_datasets_count) != ADM_SUCCESS) { + slurm_error("%s: %s: failed to parse dataset route: %s", + plugin_name, __func__, optarg); + return -1; + } + return 0; + default: return -1; } @@ -305,6 +379,8 @@ process_config(int ac, char** av, scord_plugin_config_t* cfg) { &cfg->scordctl_info.port), EXPAND_SCORD_OPT("scordctl_tmpdir", TYPE_STR, &cfg->scordctl_info.tmpdir), + EXPAND_SCORD_OPT("cargo_prog", TYPE_STR, &cfg->cargo_info.prog), + EXPAND_SCORD_OPT("cargo_port", TYPE_INT, &cfg->cargo_info.port), }; #undef EXPAND_SCORD_OPT @@ -389,6 +465,12 @@ scord_register_job(scord_plugin_config_t cfg, scord_nodelist_t nodelist, return -1; } + /* The Cargo master will also typically reside on the first node of the + * allocation */ + cfg.cargo_info.addr = margo_address_create(cfg.cargo_info.proto, + ADM_node_get_hostname(ctl_node), + cfg.cargo_info.port); + slurm_debug("%s: %s: scord_info:", plugin_name, __func__); slurm_debug("%s: %s: addr: \"%s\",", plugin_name, __func__, cfg.scord_info.addr); @@ -405,6 +487,14 @@ scord_register_job(scord_plugin_config_t cfg, scord_nodelist_t nodelist, slurm_debug("%s: %s: port: %d,", plugin_name, __func__, cfg.scordctl_info.port); + slurm_debug("%s: %s: cargo_info:", plugin_name, __func__); + slurm_debug("%s: %s: addr: \"%s\",", plugin_name, __func__, + cfg.cargo_info.addr); + slurm_debug("%s: %s: proto: \"%s\",", plugin_name, __func__, + cfg.cargo_info.proto); + slurm_debug("%s: %s: port: %d,", plugin_name, __func__, + cfg.cargo_info.port); + /* Register the job with the scord server */ scord_server = ADM_server_create(cfg.scord_info.proto, cfg.scord_info.addr); if(!scord_server) { @@ -443,9 +533,9 @@ scord_register_job(scord_plugin_config_t cfg, scord_nodelist_t nodelist, goto end; } - adhoc_ctx = ADM_adhoc_context_create(cfg.scordctl_info.addr, adhoc_mode, - ADM_ADHOC_ACCESS_RDWR, adhoc_walltime, - false); + adhoc_ctx = ADM_adhoc_context_create( + cfg.scordctl_info.addr, cfg.cargo_info.addr, adhoc_mode, + ADM_ADHOC_ACCESS_RDWR, adhoc_walltime, false); if(!adhoc_ctx) { slurm_error("%s: adhoc_context creation failed", plugin_name); rc = -1; @@ -460,8 +550,14 @@ scord_register_job(scord_plugin_config_t cfg, scord_nodelist_t nodelist, goto end; } + slurm_debug("Creating job requirements: %zu inputs, %zu outputs", + input_datasets_count, output_datasets_count); + /* no inputs or outputs */ - scord_reqs = ADM_job_requirements_create(NULL, 0, NULL, 0, adhoc_storage); + scord_reqs = ADM_job_requirements_create( + input_datasets, input_datasets_count, output_datasets, + output_datasets_count, expected_output_datasets, + expected_output_datasets_count, adhoc_storage); if(!scord_reqs) { slurm_error("%s: scord job_requirements creation", plugin_name); rc = -1; diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 086288087cbfd7ce92d5e00ccc71a52450977e9d..cc6b18107b3bb41216801d760b682809813693d0 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -85,12 +85,13 @@ add_library(libscord SHARED) target_sources( libscord - PUBLIC scord/scord.h scord/scord.hpp - PRIVATE libscord.cpp c_wrapper.cpp detail/impl.hpp detail/impl.cpp env.hpp + PUBLIC scord/scord.h scord/scord.hpp scord/types.hpp + PRIVATE libscord.cpp c_wrapper.cpp utils.cpp detail/impl.hpp detail/impl.cpp + env.hpp ) set(public_headers, "") -list(APPEND public_headers "scord/scord.h" "scord/scord.hpp") +list(APPEND public_headers "scord/scord.h" "scord/scord.hpp" "scord/types.hpp") set_target_properties(libscord PROPERTIES PUBLIC_HEADER "${public_headers}") diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 3ace0a2d16734bf6fa5aa5afe707f9ae894454a5..17c8c2c5e5b20e1d33e6387c68a316687b9b1708 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -234,7 +234,7 @@ register_adhoc_storage(const server& srv, const std::string& name, LOGGER_INFO("rpc {:<} body: {{name: {}, type: {}, adhoc_ctx: {}, " "adhoc_resources: {}}}", - rpc, name, type, ctx, resources); + rpc, std::quoted(name), type, ctx, resources); if(const auto& call_rv = endp.call(rpc.name(), name, type, ctx, resources); @@ -336,7 +336,7 @@ register_pfs_storage(const server& srv, const std::string& name, const auto& endp = lookup_rv.value(); LOGGER_INFO("rpc {:<} body: {{name: {}, type: {}, pfs_ctx: {}}}", rpc, - name, type, ctx); + std::quoted(name), type, ctx); if(const auto& call_rv = endp.call(rpc.name(), name, type, ctx); call_rv.has_value()) { diff --git a/src/lib/scord/types.h b/src/lib/scord/types.h index 633f849bb4fb2d33a68286782163474649c42621..e518cb8f3962f8cdc4053af98bbe83224973b3f8 100644 --- a/src/lib/scord/types.h +++ b/src/lib/scord/types.h @@ -108,12 +108,18 @@ typedef struct adm_job_requirements* ADM_job_requirements_t; /** A dataset */ typedef struct adm_dataset* ADM_dataset_t; +/** Routing information for a dataset */ +typedef struct adm_dataset_route* ADM_dataset_route_t; + /** Information about a dataset */ typedef struct adm_dataset_info* ADM_dataset_info_t; /** A list of datasets */ typedef struct adm_dataset_list* ADM_dataset_list_t; +/** A list of dataset routes */ +typedef struct adm_dataset_route_list* ADM_dataset_route_list_t; + /** A list of QoS limits */ typedef struct adm_qos_limit_list* ADM_qos_limit_list_t; @@ -360,12 +366,16 @@ ADM_job_resources_destroy(ADM_job_resources_t res); * @remark JOB_REQUIREMENTS created by this function need to be freed by calling * ADM_job_requirements_destroy(). * - * @param[in] inputs An array of DATASET_DESCRIPTORS describing the input + * @param[in] inputs An array of DATASET_ROUTES describing the input * information required by the job. - * @param[in] inputs_len The number of DATASET_DESCRIPTORS stored in inputs. - * @param[in] outputs An array of DATASET_DESCRIPTORS describing the output + * @param[in] inputs_len The number of DATASET_ROUTES stored in inputs. + * @param[in] outputs An array of DATASET_ROUTES describing the output * information generated by the job. - * @param[in] outputs_len The number of DATASET_DESCRIPTORS stored in outputs. + * @param[in] outputs_len The number of DATASET_ROUTES stored in outputs. + * @param[in] expected_outputs An array of DATASET_ROUTES describing the + * expected output information generated by the job. + * @param[in] expected_outputs_len The number of DATASET_ROUTES stored in + * expected_outputs. * @param[in] adhoc_storage An optional ADHOC_DESCRIPTOR describing the adhoc * storage system required by the job (can be set to NULL if no adhoc storage * system is required). @@ -373,8 +383,10 @@ ADM_job_resources_destroy(ADM_job_resources_t res); * failure. */ ADM_job_requirements_t -ADM_job_requirements_create(ADM_dataset_t inputs[], size_t inputs_len, - ADM_dataset_t outputs[], size_t outputs_len, +ADM_job_requirements_create(ADM_dataset_route_t inputs[], size_t inputs_len, + ADM_dataset_route_t outputs[], size_t outputs_len, + ADM_dataset_route_t expected_outputs[], + size_t expected_outputs_len, ADM_adhoc_storage_t adhoc_storage); /** @@ -425,6 +437,28 @@ ADM_dataset_create(const char* id); ADM_return_t ADM_dataset_destroy(ADM_dataset_t dataset); +/** + * Create a dataset route from a source and destination dataset. + * + * @remark Dataset routes need to be freed by calling + * ADM_dataset_route_destroy(). + * + * @param source The source dataset + * @param destination The destination dataset + * @return A valid ADM_dataset_route_t if successful or NULL in case of failure. + */ +ADM_dataset_route_t +ADM_dataset_route_create(ADM_dataset_t source, ADM_dataset_t destination); + +/** + * Destroy a dataset route created by ADM_dataset_route_create(). + * + * @param route A valid ADM_dataset_route_t + * @return ADM_SUCCESS or corresponding ADM error code + */ +ADM_return_t +ADM_dataset_route_destroy(ADM_dataset_route_t route); + /** * Create a dataset from a user-provided id (e.g. a path for POSIX-like file * systems or key for key-value stores). @@ -536,6 +570,8 @@ ADM_adhoc_resources_destroy(ADM_adhoc_resources_t res); * * @param[in] ctl_address The address of the control node for the * adhoc storage system + * @param[in] stager_address The address of the data stager for the + * adhoc storage system * @param[in] exec_mode The adhoc storage system execution mode * @param[in] access_type The adhoc storage system execution type * @param[in] walltime The adhoc storage system walltime @@ -544,7 +580,8 @@ ADM_adhoc_resources_destroy(ADM_adhoc_resources_t res); * @return A valid ADM_ADHOC_CONTEXT if successful. NULL otherwise. */ ADM_adhoc_context_t -ADM_adhoc_context_create(const char* ctl_address, ADM_adhoc_mode_t exec_mode, +ADM_adhoc_context_create(const char* ctl_address, const char* stager_address, + ADM_adhoc_mode_t exec_mode, ADM_adhoc_access_t access_type, uint32_t walltime, bool should_flush); diff --git a/src/lib/scord/types.hpp b/src/lib/scord/types.hpp index 4f34f36aadb4f2ea11aa0ab6ec015e7a83b6b995..d6e771d17ac219f47669b412794ee345db955370 100644 --- a/src/lib/scord/types.hpp +++ b/src/lib/scord/types.hpp @@ -51,6 +51,7 @@ struct error_code { static const error_code adhoc_dir_exists; static const error_code subprocess_error; static const error_code no_resources; + static const error_code timeout; static const error_code other; constexpr error_code() : m_value(ADM_SUCCESS) {} @@ -89,6 +90,7 @@ struct error_code { ADM_ERROR_CASE(ADM_EADHOC_DIR_CREATE_FAILED); ADM_ERROR_CASE(ADM_EADHOC_DIR_EXISTS); ADM_ERROR_CASE(ADM_ESUBPROCESS_ERROR); + ADM_ERROR_CASE(ADM_ETIMEOUT); ADM_ERROR_CASE(ADM_EOTHER); ADM_ERROR_DEFAULT_MSG("INVALID_ERROR_VALUE"); } @@ -123,6 +125,7 @@ constexpr error_code error_code::adhoc_dir_create_failed{ constexpr error_code error_code::adhoc_dir_exists{ADM_EADHOC_DIR_EXISTS}; constexpr error_code error_code::subprocess_error{ADM_ESUBPROCESS_ERROR}; constexpr error_code error_code::no_resources{ADM_ENO_RESOURCES}; +constexpr error_code error_code::timeout{ADM_ETIMEOUT}; constexpr error_code error_code::other{ADM_EOTHER}; using job_id = std::uint64_t; @@ -190,6 +193,7 @@ private: }; struct dataset; +struct dataset_route; struct adhoc_storage { @@ -236,14 +240,19 @@ struct adhoc_storage { ctx() = default; - ctx(std::string controller_address, execution_mode exec_mode, - access_type access_type, std::uint32_t walltime, bool should_flush); + ctx(std::string controller_address, std::string data_stager_address, + execution_mode exec_mode, access_type access_type, + std::uint32_t walltime, bool should_flush); explicit ctx(ADM_adhoc_context_t ctx); explicit operator ADM_adhoc_context_t() const; std::string const& controller_address() const; + + std::string const& + data_stager_address() const; + execution_mode exec_mode() const; enum access_type @@ -257,6 +266,7 @@ struct adhoc_storage { void serialize(Archive&& ar) { ar & m_controller_address; + ar & m_data_stager_address; ar & m_exec_mode; ar & m_access_type; ar & m_walltime; @@ -265,6 +275,7 @@ struct adhoc_storage { private: std::string m_controller_address; + std::string m_data_stager_address; execution_mode m_exec_mode; enum access_type m_access_type; std::uint32_t m_walltime; @@ -409,17 +420,21 @@ struct job { struct requirements { requirements(); - requirements(std::vector inputs, - std::vector outputs); - requirements(std::vector inputs, - std::vector outputs, + requirements(std::vector inputs, + std::vector outputs, + std::vector expected_outputs); + requirements(std::vector inputs, + std::vector outputs, + std::vector expected_outputs, scord::adhoc_storage adhoc_storage); explicit requirements(ADM_job_requirements_t reqs); - std::vector + std::vector const& inputs() const; - std::vector + std::vector const& outputs() const; + std::vector const& + expected_outputs() const; std::optional adhoc_storage() const; @@ -430,12 +445,14 @@ struct job { serialize(Archive& ar) { ar & m_inputs; ar & m_outputs; + ar & m_expected_outputs; ar & m_adhoc_storage; } private: - std::vector m_inputs; - std::vector m_outputs; + std::vector m_inputs; + std::vector m_outputs; + std::vector m_expected_outputs; std::optional m_adhoc_storage; }; @@ -656,6 +673,33 @@ private: std::unique_ptr m_pimpl; }; +struct dataset_route { + dataset_route(); + explicit dataset_route(scord::dataset src, scord::dataset dst); + explicit dataset_route(ADM_dataset_route_t route); + dataset_route(const dataset_route&) noexcept; + dataset_route(dataset_route&&) noexcept; + dataset_route& + operator=(const dataset_route&) noexcept; + dataset_route& + operator=(dataset_route&&) noexcept; + ~dataset_route(); + + scord::dataset const& + source() const; + + scord::dataset const& + destination() const; + + template + void + serialize(Archive& ar); + +private: + class impl; + std::unique_ptr m_pimpl; +}; + } // namespace scord @@ -719,6 +763,31 @@ struct fmt::formatter> } }; +template <> +struct fmt::formatter : formatter { + // parse is inherited from formatter. + template + auto + format(const scord::dataset_route& r, FormatContext& ctx) const { + const auto str = fmt::format("{{src: {}, dst: {}}}", r.source(), + r.destination()); + return formatter::format(str, ctx); + } +}; + +template <> +struct fmt::formatter> + : fmt::formatter { + // parse is inherited from formatter. + template + auto + format(const std::vector& v, + FormatContext& ctx) const { + const auto str = fmt::format("[{}]", fmt::join(v, ", ")); + return formatter::format(str, ctx); + } +}; + template <> struct fmt::formatter : fmt::formatter { // parse is inherited from formatter. @@ -923,11 +992,13 @@ struct fmt::formatter : formatter { template auto format(const scord::adhoc_storage::ctx& c, FormatContext& ctx) const { - return format_to(ctx.out(), - "{{controller: {}, execution_mode: {}, " - "access_type: {}, walltime: {}, should_flush: {}}}", - std::quoted(c.controller_address()), c.exec_mode(), - c.access_type(), c.walltime(), c.should_flush()); + return format_to( + ctx.out(), + "{{controller: {}, data_stager: {}, execution_mode: {}, " + "access_type: {}, walltime: {}, should_flush: {}}}", + std::quoted(c.controller_address()), + std::quoted(c.data_stager_address()), c.exec_mode(), + c.access_type(), c.walltime(), c.should_flush()); } }; @@ -1045,8 +1116,10 @@ struct fmt::formatter : formatter { auto format(const scord::job::requirements& r, FormatContext& ctx) const { return formatter::format( - fmt::format("{{inputs: {}, outputs: {}, adhoc_storage: {}}}", - r.inputs(), r.outputs(), r.adhoc_storage()), + fmt::format("{{inputs: {}, outputs: {}, " + "expected_outputs: {}, adhoc_storage: {}}}", + r.inputs(), r.outputs(), r.expected_outputs(), + r.adhoc_storage()), ctx); } }; diff --git a/src/lib/scord/utils.h b/src/lib/scord/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..bcdb134c873093699fc66f6043ed82dff7740045 --- /dev/null +++ b/src/lib/scord/utils.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of the scord API. + * + * The scord API is free software: you can redistribute it and/or modify + * it under the terms of the Lesser GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The scord API is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the Lesser GNU General Public License + * along with the scord API. If not, see . + * + * SPDX-License-Identifier: LGPL-3.0-or-later + *****************************************************************************/ + +#ifndef SCORD_UTILS_H +#define SCORD_UTILS_H + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +ADM_return_t +scord_utils_parse_dataset_routes(const char* routes, + ADM_dataset_route_t** parsed_routes, + size_t* parsed_routes_count); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // SCORD_UTILS_H diff --git a/src/lib/types.c b/src/lib/types.c index ab439d33911b70bbb97f6b4d0fa82ecaf83d4882..1e4269095aa8075fe69ec924c02d52104ba46bbc 100644 --- a/src/lib/types.c +++ b/src/lib/types.c @@ -264,6 +264,79 @@ ADM_dataset_destroy(ADM_dataset_t dataset) { return ret; } +ADM_dataset_route_t +ADM_dataset_route_create(ADM_dataset_t source, ADM_dataset_t destination) { + + struct adm_dataset_route* adm_dataset_route = + (struct adm_dataset_route*) malloc( + sizeof(struct adm_dataset_route)); + + if(!adm_dataset_route) { + LOGGER_ERROR("Could not allocate ADM_dataset_route_t"); + return NULL; + } + + adm_dataset_route->d_src = ADM_dataset_create(source->d_id); + + if(!adm_dataset_route->d_src) { + LOGGER_ERROR("Could not allocate ADM_dataset_t"); + return NULL; + } + + adm_dataset_route->d_dst = ADM_dataset_create(destination->d_id); + + if(!adm_dataset_route->d_dst) { + LOGGER_ERROR("Could not allocate ADM_dataset_t"); + return NULL; + } + + return adm_dataset_route; +} + +ADM_dataset_route_t +ADM_dataset_route_copy(ADM_dataset_route_t dst, const ADM_dataset_route_t src) { + + if(!src || !dst) { + return NULL; + } + + // copy all primitive types + *dst = *src; + + // duplicate copy any pointer types + if(src->d_src) { + dst->d_src = ADM_dataset_create(src->d_src->d_id); + } + + if(src->d_dst) { + dst->d_dst = ADM_dataset_create(src->d_dst->d_id); + } + + return dst; +} + +ADM_return_t +ADM_dataset_route_destroy(ADM_dataset_route_t route) { + + ADM_return_t ret = ADM_SUCCESS; + + if(!route) { + LOGGER_ERROR("Invalid ADM_dataset_route_t"); + return ADM_EBADARGS; + } + + if(route->d_src) { + ADM_dataset_destroy(route->d_src); + } + + if(route->d_dst) { + ADM_dataset_destroy(route->d_dst); + } + /* This causes a double free */ + //free(route); + return ret; +} + ADM_qos_entity_t ADM_qos_entity_create(ADM_qos_scope_t scope, void* data) { @@ -448,6 +521,70 @@ ADM_dataset_list_destroy(ADM_dataset_list_t list) { return ret; } +ADM_dataset_route_list_t +ADM_dataset_route_list_create(ADM_dataset_route_t routes[], size_t length) { + + ADM_dataset_route_list_t p = (ADM_dataset_route_list_t) malloc(sizeof(*p)); + + if(!p) { + LOGGER_ERROR("Could not allocate ADM_dataset_route_list_t"); + return NULL; + } + + const char* error_msg = NULL; + + p->l_length = length; + p->l_routes = (struct adm_dataset_route*) calloc( + length, sizeof(struct adm_dataset_route)); + + if(!p->l_routes) { + error_msg = "Could not allocate ADM_dataset_route_list_t"; + goto cleanup_on_error; + } + + for(size_t i = 0; i < length; ++i) { + if(!ADM_dataset_route_copy(&p->l_routes[i], routes[i])) { + error_msg = "Could not allocate ADM_dataset_route_list_t"; + goto cleanup_on_error; + }; + } + + return p; + +cleanup_on_error: + if(p->l_routes) { + free(p->l_routes); + } + free(p); + + LOGGER_ERROR(error_msg); + + return NULL; +} + +ADM_return_t +ADM_dataset_route_list_destroy(ADM_dataset_route_list_t list) { + ADM_return_t ret = ADM_SUCCESS; + + if(!list) { + LOGGER_ERROR("Invalid ADM_dataset_route_list_t"); + return ADM_EBADARGS; + } + + // We cannot call ADM_dataset_route_destroy here because adm_dataset_routes + // are stored as a consecutive array in memory. Thus, we free + // the dataset route ids themselves and then the array. + if(list->l_routes) { + for(size_t i = 0; i < list->l_length; ++i) { + ADM_dataset_route_destroy(&list->l_routes[i]); + } + free(list->l_routes); + } + + free(list); + return ret; +} + ADM_adhoc_storage_t ADM_adhoc_storage_create(const char* name, ADM_adhoc_storage_type_t type, uint64_t id, ADM_adhoc_context_t adhoc_ctx, @@ -676,7 +813,8 @@ ADM_data_operation_destroy(ADM_data_operation_t op) { } ADM_adhoc_context_t -ADM_adhoc_context_create(const char* ctl_address, ADM_adhoc_mode_t exec_mode, +ADM_adhoc_context_create(const char* ctl_address, const char* stager_address, + ADM_adhoc_mode_t exec_mode, ADM_adhoc_access_t access_type, uint32_t walltime, bool should_flush) { @@ -685,6 +823,11 @@ ADM_adhoc_context_create(const char* ctl_address, ADM_adhoc_mode_t exec_mode, return NULL; } + if(!stager_address) { + LOGGER_ERROR("The address to the stager cannot be NULL"); + return NULL; + } + struct adm_adhoc_context* adm_adhoc_context = (struct adm_adhoc_context*) malloc(sizeof(*adm_adhoc_context)); @@ -699,6 +842,11 @@ ADM_adhoc_context_create(const char* ctl_address, ADM_adhoc_mode_t exec_mode, (const char*) calloc(n + 1, sizeof(char)); strcpy((char*) adm_adhoc_context->c_ctl_address, ctl_address); + n = strlen(stager_address); + adm_adhoc_context->c_stager_address = + (const char*) calloc(n + 1, sizeof(char)); + strcpy((char*) adm_adhoc_context->c_stager_address, stager_address); + adm_adhoc_context->c_mode = exec_mode; adm_adhoc_context->c_access = access_type; adm_adhoc_context->c_walltime = walltime; @@ -810,8 +958,10 @@ ADM_job_resources_destroy(ADM_job_resources_t res) { ADM_job_requirements_t -ADM_job_requirements_create(ADM_dataset_t inputs[], size_t inputs_len, - ADM_dataset_t outputs[], size_t outputs_len, +ADM_job_requirements_create(ADM_dataset_route_t inputs[], size_t inputs_len, + ADM_dataset_route_t outputs[], size_t outputs_len, + ADM_dataset_route_t expected_outputs[], + size_t expected_outputs_len, ADM_adhoc_storage_t adhoc_storage) { struct adm_job_requirements* adm_job_reqs = @@ -823,26 +973,36 @@ ADM_job_requirements_create(ADM_dataset_t inputs[], size_t inputs_len, return NULL; } - ADM_dataset_list_t inputs_list = NULL; - ADM_dataset_list_t outputs_list = NULL; + ADM_dataset_route_list_t inputs_list = NULL; + ADM_dataset_route_list_t outputs_list = NULL; + ADM_dataset_route_list_t expected_outputs_list = NULL; const char* error_msg = NULL; - inputs_list = ADM_dataset_list_create(inputs, inputs_len); + inputs_list = ADM_dataset_route_list_create(inputs, inputs_len); if(!inputs_list) { error_msg = "Could not allocate ADM_job_requirements_t"; goto cleanup_on_error; } - outputs_list = ADM_dataset_list_create(outputs, outputs_len); + outputs_list = ADM_dataset_route_list_create(outputs, outputs_len); if(!outputs_list) { error_msg = "Could not allocate ADM_job_requirements_t"; goto cleanup_on_error; } + expected_outputs_list = ADM_dataset_route_list_create(expected_outputs, + expected_outputs_len); + + if(!expected_outputs_list) { + error_msg = "Could not allocate ADM_job_requirements_t"; + goto cleanup_on_error; + } + adm_job_reqs->r_inputs = inputs_list; adm_job_reqs->r_outputs = outputs_list; + adm_job_reqs->r_expected_outputs = expected_outputs_list; if(!adhoc_storage) { return adm_job_reqs; @@ -878,11 +1038,15 @@ ADM_job_requirements_destroy(ADM_job_requirements_t reqs) { } if(reqs->r_inputs) { - ADM_dataset_list_destroy(reqs->r_inputs); + ADM_dataset_route_list_destroy(reqs->r_inputs); } if(reqs->r_outputs) { - ADM_dataset_list_destroy(reqs->r_outputs); + ADM_dataset_route_list_destroy(reqs->r_outputs); + } + + if(reqs->r_expected_outputs) { + ADM_dataset_route_list_destroy(reqs->r_expected_outputs); } if(reqs->r_adhoc_storage) { diff --git a/src/lib/types.cpp b/src/lib/types.cpp index cb6782e8a7ed5ee8baa523737e57ea6facb92250..e51c2a08ee7e9dc6812fc96e54213718f106174a 100644 --- a/src/lib/types.cpp +++ b/src/lib/types.cpp @@ -232,14 +232,20 @@ private: job::requirements::requirements() = default; -job::requirements::requirements(std::vector inputs, - std::vector outputs) - : m_inputs(std::move(inputs)), m_outputs(std::move(outputs)) {} +job::requirements::requirements( + std::vector inputs, + std::vector outputs, + std::vector expected_outputs) + : m_inputs(std::move(inputs)), m_outputs(std::move(outputs)), + m_expected_outputs(std::move(expected_outputs)) {} -job::requirements::requirements(std::vector inputs, - std::vector outputs, - scord::adhoc_storage adhoc_storage) +job::requirements::requirements( + std::vector inputs, + std::vector outputs, + std::vector expected_outputs, + scord::adhoc_storage adhoc_storage) : m_inputs(std::move(inputs)), m_outputs(std::move(outputs)), + m_expected_outputs(std::move(expected_outputs)), m_adhoc_storage(std::move(adhoc_storage)) {} job::requirements::requirements(ADM_job_requirements_t reqs) { @@ -247,13 +253,23 @@ job::requirements::requirements(ADM_job_requirements_t reqs) { m_inputs.reserve(reqs->r_inputs->l_length); for(size_t i = 0; i < reqs->r_inputs->l_length; ++i) { - m_inputs.emplace_back(reqs->r_inputs->l_datasets[i].d_id); + m_inputs.emplace_back(dataset{reqs->r_inputs->l_routes[i].d_src}, + dataset{reqs->r_inputs->l_routes[i].d_dst}); } m_outputs.reserve(reqs->r_outputs->l_length); for(size_t i = 0; i < reqs->r_outputs->l_length; ++i) { - m_outputs.emplace_back(reqs->r_outputs->l_datasets[i].d_id); + m_outputs.emplace_back(dataset{reqs->r_inputs->l_routes[i].d_src}, + dataset{reqs->r_inputs->l_routes[i].d_dst}); + } + + m_expected_outputs.reserve(reqs->r_expected_outputs->l_length); + + for(size_t i = 0; i < reqs->r_expected_outputs->l_length; ++i) { + m_expected_outputs.emplace_back( + dataset{reqs->r_expected_outputs->l_routes[i].d_src}, + dataset{reqs->r_expected_outputs->l_routes[i].d_dst}); } if(reqs->r_adhoc_storage) { @@ -261,16 +277,21 @@ job::requirements::requirements(ADM_job_requirements_t reqs) { } } -std::vector +std::vector const& job::requirements::inputs() const { return m_inputs; } -std::vector +std::vector const& job::requirements::outputs() const { return m_outputs; } +std::vector const& +job::requirements::expected_outputs() const { + return m_expected_outputs; +} + std::optional job::requirements::adhoc_storage() const { return m_adhoc_storage; @@ -528,6 +549,110 @@ template void dataset::serialize( network::serialization::input_archive&); +class dataset_route::impl { +public: + impl() = default; + explicit impl(dataset src, dataset dst) + : m_source(std::move(src)), m_destination(std::move(dst)) {} + impl(const impl& rhs) = default; + impl(impl&& rhs) = default; + impl& + operator=(const impl& other) noexcept = default; + impl& + operator=(impl&&) noexcept = default; + ~impl() = default; + + dataset const& + source() const { + return m_source; + } + + dataset const& + destination() const { + return m_destination; + } + + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_source)); + ar(SCORD_SERIALIZATION_NVP(m_destination)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_source)); + ar(SCORD_SERIALIZATION_NVP(m_destination)); + } + +private: + dataset m_source; + dataset m_destination; +}; + +dataset_route::dataset_route() = default; + +dataset_route::dataset_route(dataset src, dataset dst) + : m_pimpl(std::make_unique(std::move(src), + std::move(dst))) {} + +dataset_route::dataset_route(ADM_dataset_route_t dataset_route) + : dataset_route::dataset_route(dataset{dataset_route->d_src}, + dataset{dataset_route->d_dst}) {} + +dataset_route::dataset_route(const dataset_route& other) noexcept + : m_pimpl(std::make_unique(*other.m_pimpl)) {} + +dataset_route::dataset_route(dataset_route&&) noexcept = default; + +dataset_route& +dataset_route::operator=(const dataset_route& other) noexcept { + this->m_pimpl = std::make_unique(*other.m_pimpl); + return *this; +} + +dataset_route& +dataset_route::operator=(dataset_route&&) noexcept = default; + +dataset_route::~dataset_route() = default; + +dataset const& +dataset_route::source() const { + return m_pimpl->source(); +} + +dataset const& +dataset_route::destination() const { + return m_pimpl->destination(); +} + +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +dataset_route::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +dataset_route::impl::save( + network::serialization::output_archive&) const; + +template void +dataset_route::impl::load( + network::serialization::input_archive&); + +template void +dataset_route::serialize( + network::serialization::output_archive&); + +template void +dataset_route::serialize( + network::serialization::input_archive&); + adhoc_storage::resources::resources(std::vector nodes) : m_nodes(std::move(nodes)) {} @@ -562,22 +687,24 @@ adhoc_storage::resources::nodes() const { } adhoc_storage::ctx::ctx(std::string controller_address, + std::string data_stager_address, adhoc_storage::execution_mode exec_mode, adhoc_storage::access_type access_type, std::uint32_t walltime, bool should_flush) : m_controller_address(std::move(controller_address)), + m_data_stager_address(std::move(data_stager_address)), m_exec_mode(exec_mode), m_access_type(access_type), m_walltime(walltime), m_should_flush(should_flush) {} adhoc_storage::ctx::ctx(ADM_adhoc_context_t ctx) - : adhoc_storage::ctx(ctx->c_ctl_address, + : adhoc_storage::ctx(ctx->c_ctl_address, ctx->c_stager_address, static_cast(ctx->c_mode), static_cast(ctx->c_access), ctx->c_walltime, ctx->c_should_bg_flush) {} adhoc_storage::ctx::operator ADM_adhoc_context_t() const { return ADM_adhoc_context_create( - m_controller_address.c_str(), + m_controller_address.c_str(), m_data_stager_address.c_str(), static_cast(m_exec_mode), static_cast(m_access_type), m_walltime, m_should_flush); @@ -588,6 +715,11 @@ adhoc_storage::ctx::controller_address() const { return m_controller_address; } +std::string const& +adhoc_storage::ctx::data_stager_address() const { + return m_data_stager_address; +} + adhoc_storage::execution_mode adhoc_storage::ctx::exec_mode() const { return m_exec_mode; diff --git a/src/lib/types_private.h b/src/lib/types_private.h index 716766e35950eaf83adec2378bda0daa91f8bb65..3e0ba099bf1e3ec0c3448d9e71bb20793e4a6983 100644 --- a/src/lib/types_private.h +++ b/src/lib/types_private.h @@ -45,6 +45,11 @@ struct adm_dataset { const char* d_id; }; +struct adm_dataset_route { + ADM_dataset_t d_src; + ADM_dataset_t d_dst; +}; + struct adm_job { uint64_t j_id; uint64_t j_slurm_id; @@ -85,6 +90,8 @@ struct adm_dataset_info { struct adm_adhoc_context { /** The address to the node responsible for this adhoc storage system */ const char* c_ctl_address; + /** The address to the data stager for this adhoc storage system */ + const char* c_stager_address; /** The adhoc storage system execution mode */ ADM_adhoc_mode_t c_mode; /** The adhoc storage system access type */ @@ -127,9 +134,11 @@ struct adm_data_operation { struct adm_job_requirements { /** An array of input datasets */ - ADM_dataset_list_t r_inputs; + ADM_dataset_route_list_t r_inputs; /** An array of output datasets */ - ADM_dataset_list_t r_outputs; + ADM_dataset_route_list_t r_outputs; + /** An array of expected output datasets */ + ADM_dataset_route_list_t r_expected_outputs; /** An optional definition for a specific storage instance */ ADM_adhoc_storage_t r_adhoc_storage; }; @@ -147,6 +156,13 @@ struct adm_dataset_list { size_t l_length; }; +struct adm_dataset_route_list { + /** An array of dataset routes */ + struct adm_dataset_route* l_routes; + /** The length of the array */ + size_t l_length; +}; + struct adm_qos_limit_list { /** An array of QoS limits */ struct adm_qos_limit* l_limits; diff --git a/src/lib/utils.cpp b/src/lib/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..846fc0eb9b230b15672dba96e87d7dd4beb1ebcc --- /dev/null +++ b/src/lib/utils.cpp @@ -0,0 +1,99 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of the scord API. + * + * The scord API is free software: you can redistribute it and/or modify + * it under the terms of the Lesser GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The scord API is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the Lesser GNU General Public License + * along with the scord API. If not, see . + * + * SPDX-License-Identifier: LGPL-3.0-or-later + *****************************************************************************/ + +#include +#include +#include + + +namespace { + +std::vector +split(const std::string& text, char sep) { + std::vector tokens; + std::size_t start = 0, end; + + while((end = text.find(sep, start)) != std::string::npos) { + tokens.push_back(text.substr(start, end - start)); + start = end + 1; + } + + tokens.push_back(text.substr(start)); + return tokens; +} + +} // namespace + +extern "C" ADM_return_t +scord_utils_parse_dataset_routes(const char* routes, + ADM_dataset_route_t** parsed_routes, + size_t* parsed_routes_count) { + + std::vector tmp; + + if(routes == nullptr || parsed_routes == nullptr || + parsed_routes_count == nullptr) { + return ADM_EBADARGS; + } + + const std::string route_str(routes); + + if(route_str.empty()) { + return ADM_EBADARGS; + } + + for(auto&& rs : split(route_str, ';')) { + + const auto parts = split(rs, '='); + + if(parts.size() != 2) { + return ADM_EBADARGS; + } + + ADM_dataset_route_t dr = + ADM_dataset_route_create(ADM_dataset_create(parts[0].c_str()), + ADM_dataset_create(parts[1].c_str())); + + if(dr == nullptr) { + return ADM_ENOMEM; + } + + tmp.push_back(dr); + } + + *parsed_routes = static_cast( + malloc(tmp.size() * sizeof(ADM_dataset_route_t))); + + if(*parsed_routes == nullptr) { + return ADM_ENOMEM; + } + + *parsed_routes_count = tmp.size(); + + for(std::size_t i = 0; i < tmp.size(); i++) { + (*parsed_routes)[i] = tmp[i]; + } + + return ADM_SUCCESS; +} diff --git a/src/scord-ctl/command.cpp b/src/scord-ctl/command.cpp index 65f282fba521cb1bb82c03db78744383795d67e8..c45b5b5cd52193584c3d32f9495dfb536f35804a 100644 --- a/src/scord-ctl/command.cpp +++ b/src/scord-ctl/command.cpp @@ -135,6 +135,52 @@ command::eval(const std::string& adhoc_id, return command{result, m_env}; } +command +command::eval(const std::string& adhoc_id, + const std::vector& adhoc_nodes) const { + + // generate a regex from a map of key/value pairs + constexpr auto regex_from_map = + [](const std::map& m) -> std::regex { + std::string result; + for(const auto& [key, value] : m) { + const auto escaped_key = + std::regex_replace(key, std::regex{R"([{}])"}, R"(\$&)"); + result += fmt::format("{}|", escaped_key); + } + result.pop_back(); + return std::regex{result}; + }; + + const std::map replacements{ + {std::string{keywords_malleability.at(0)}, adhoc_id}, + {std::string{keywords_malleability.at(1)}, + fmt::format("\"{}\"", fmt::join(adhoc_nodes, ","))}}; + + // make sure that we fail if we ever add a new keyword and forget to add + // a replacement for it + assert(replacements.size() == keywords_malleability.size()); + + std::string result; + + const auto re = regex_from_map(replacements); + auto it = std::sregex_iterator(m_cmdline.begin(), m_cmdline.end(), re); + auto end = std::sregex_iterator{}; + + std::string::size_type last_pos = 0; + + for(; it != end; ++it) { + const auto& match = *it; + result += m_cmdline.substr(last_pos, match.position() - last_pos); + result += replacements.at(match.str()); + last_pos = match.position() + match.length(); + } + + result += m_cmdline.substr(last_pos, m_cmdline.length() - last_pos); + + return command{result, m_env}; +} + std::vector command::as_vector() const { std::vector tmp; diff --git a/src/scord-ctl/command.hpp b/src/scord-ctl/command.hpp index 73ada0e8e0ce9a8b2f87850af2ac0ee7799d88eb..a3551ed7f9bbdd9297f1e89f41643d2ef719b91f 100644 --- a/src/scord-ctl/command.hpp +++ b/src/scord-ctl/command.hpp @@ -109,6 +109,8 @@ public: static constexpr std::array keywords = { "{ADHOC_ID}", "{ADHOC_DIRECTORY}", "{ADHOC_NODES}"}; + static constexpr std::array keywords_malleability = { + "{ADHOC_ID}", "{ADHOC_NODES}"}; /** * @brief Construct a command. * @@ -152,6 +154,20 @@ public: const std::filesystem::path& adhoc_directory, const std::vector& adhoc_nodes) const; + + /** + * @brief Return a copy of the current `command` where all the keywords in + * its command line template have been replaced with string + * representations of the arguments provided. + * + * @param adhoc_id The ID of the adhoc storage system. + * @param adhoc_nodes The nodes where the adhoc storage will run. + * @return The evaluated command. + */ + command + eval(const std::string& adhoc_id, + const std::vector& adhoc_nodes) const; + /** * @brief Get the command line to be executed as a vector of strings. The * command line is split on spaces with each string in the resulting diff --git a/src/scord-ctl/config_file.cpp b/src/scord-ctl/config_file.cpp index c8855ea50afffe9c3995680d9dcad7496e9df877..bb53f65352112a1d0dd72f42ad988e98b7ed0ddf 100644 --- a/src/scord-ctl/config_file.cpp +++ b/src/scord-ctl/config_file.cpp @@ -195,6 +195,9 @@ parse_adhoc_config_node(const ryml::ConstNodeRef& node) { std::filesystem::path working_directory; std::optional startup_command; std::optional shutdown_command; + std::optional expand_command; + std::optional shrink_command; + for(const auto& child : node) { @@ -212,6 +215,10 @@ parse_adhoc_config_node(const ryml::ConstNodeRef& node) { startup_command = ::parse_command_node(child); } else if(child.key() == "shutdown") { shutdown_command = ::parse_command_node(child); + } else if(child.key() == "expand") { + expand_command = ::parse_command_node(child); + } else if(child.key() == "shrink") { + shrink_command = ::parse_command_node(child); } else { fmt::print(stderr, "WARNING: Unknown key: '{}'. Ignored.\n", child.key()); @@ -222,7 +229,8 @@ parse_adhoc_config_node(const ryml::ConstNodeRef& node) { throw std::runtime_error{"missing required `working_directory` key"}; } - return {working_directory, *startup_command, *shutdown_command}; + return {working_directory, *startup_command, *shutdown_command, + *expand_command, *shrink_command}; } /** @@ -302,10 +310,13 @@ namespace scord_ctl::config { adhoc_storage_config::adhoc_storage_config( std::filesystem::path working_directory, command startup_command, - command shutdown_command) + command shutdown_command, command expand_command, + command shrink_command) : m_working_directory(std::move(working_directory)), m_startup_command(std::move(startup_command)), - m_shutdown_command(std::move(shutdown_command)) {} + m_shutdown_command(std::move(shutdown_command)), + m_expand_command(std::move(expand_command)), + m_shrink_command(std::move(shrink_command)) {} const std::filesystem::path& adhoc_storage_config::working_directory() const { @@ -322,6 +333,16 @@ adhoc_storage_config::shutdown_command() const { return m_shutdown_command; } +const command& +adhoc_storage_config::expand_command() const { + return m_expand_command; +} + +const command& +adhoc_storage_config::shrink_command() const { + return m_shrink_command; +} + config_file::config_file(const std::filesystem::path& path) { std::ifstream input{path}; diff --git a/src/scord-ctl/config_file.hpp b/src/scord-ctl/config_file.hpp index 119c6433e3cf2de67061c1d2b9fc2cc45134f327..ef93bc0c76ceb62da25bf1dcace162cca56df2e5 100644 --- a/src/scord-ctl/config_file.hpp +++ b/src/scord-ctl/config_file.hpp @@ -46,9 +46,14 @@ public: * storage. * @param shutdown_command The command to be executed to stop the adhoc * storage. + * @param expand_command The command to be executed to expand the adhoc + * storage. + * @param shrink_command The command to be executed to shrink the adhoc + * storage. */ adhoc_storage_config(std::filesystem::path working_directory, - command startup_command, command shutdown_command); + command startup_command, command shutdown_command, + command expand_command, command shrink_command); /** * @brief Get the directory where the adhoc storage will run. @@ -74,10 +79,30 @@ public: const command& shutdown_command() const; + + /** + * @brief Get the command to be executed to expand the adhoc storage. + * + * @return The command to be executed to expand the adhoc storage. + */ + const command& + expand_command() const; + + + /** + * @brief Get the command to be executed to shrink the adhoc storage. + * + * @return The command to be executed to shrink the adhoc storage. + */ + const command& + shrink_command() const; + private: std::filesystem::path m_working_directory; command m_startup_command; command m_shutdown_command; + command m_expand_command; + command m_shrink_command; }; #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11 diff --git a/src/scord-ctl/rpc_server.cpp b/src/scord-ctl/rpc_server.cpp index 177104f53326d39fe4487face2e5fb58e04be0e8..edff7198471bb9dbf5f3b786983527d604813221 100644 --- a/src/scord-ctl/rpc_server.cpp +++ b/src/scord-ctl/rpc_server.cpp @@ -44,6 +44,8 @@ rpc_server::rpc_server(std::string name, std::string address, bool daemonize, provider::define(EXPAND(ping)); provider::define(EXPAND(deploy_adhoc_storage)); + provider::define(EXPAND(expand_adhoc_storage)); + provider::define(EXPAND(shrink_adhoc_storage)); provider::define(EXPAND(terminate_adhoc_storage)); #undef EXPAND @@ -196,6 +198,128 @@ respond: req.respond(resp); } +void +rpc_server::expand_adhoc_storage( + const network::request& req, const std::string& adhoc_uuid, + enum scord::adhoc_storage::type adhoc_type, + const scord::adhoc_storage::resources& adhoc_resources) { + + using network::generic_response; + using network::get_address; + using network::rpc_info; + + const auto rpc = rpc_info::create(RPC_NAME(), get_address(req)); + std::optional adhoc_dir; + + LOGGER_INFO("rpc {:>} body: {{uuid: {}, type: {}, resources: {}}}", rpc, + std::quoted(adhoc_uuid), adhoc_type, adhoc_resources); + + auto ec = scord::error_code::success; + + if(!m_config.has_value() || m_config->adhoc_storage_configs().empty()) { + LOGGER_WARN("No adhoc storage configurations available"); + ec = scord::error_code::snafu; + goto respond; + } + + if(const auto it = m_config->adhoc_storage_configs().find(adhoc_type); + it != m_config->adhoc_storage_configs().end()) { + const auto& adhoc_cfg = it->second; + + LOGGER_DEBUG("deploy \"{:e}\" (ID: {})", adhoc_type, adhoc_uuid); + + // 1. Construct the expand command for the adhoc storage instance + std::vector hostnames; + std::ranges::transform( + adhoc_resources.nodes(), std::back_inserter(hostnames), + [](const auto& node) { return node.hostname(); }); + + const auto cmd = adhoc_cfg.expand_command().eval(adhoc_uuid, hostnames); + + // 4. Execute the startup command + try { + LOGGER_DEBUG("[{}] exec: {}", adhoc_uuid, cmd); + cmd.exec(); + } catch(const std::exception& ex) { + LOGGER_ERROR("[{}] Failed to execute expand command: {}", + adhoc_uuid, ex.what()); + ec = scord::error_code::subprocess_error; + } + } else { + LOGGER_WARN( + "Failed to find adhoc storage configuration for type '{:e}'", + adhoc_type); + ec = scord::error_code::adhoc_type_unsupported; + } + +respond: + const generic_response resp{rpc.id(), ec}; + LOGGER_INFO("rpc {:<} body: {{retval: {}}}", rpc, resp.error_code()); + req.respond(resp); +} + + +void +rpc_server::shrink_adhoc_storage( + const network::request& req, const std::string& adhoc_uuid, + enum scord::adhoc_storage::type adhoc_type, + const scord::adhoc_storage::resources& adhoc_resources) { + + using network::generic_response; + using network::get_address; + using network::rpc_info; + + const auto rpc = rpc_info::create(RPC_NAME(), get_address(req)); + std::optional adhoc_dir; + + LOGGER_INFO("rpc {:>} body: {{uuid: {}, type: {}, resources: {}}}", rpc, + std::quoted(adhoc_uuid), adhoc_type, adhoc_resources); + + auto ec = scord::error_code::success; + + if(!m_config.has_value() || m_config->adhoc_storage_configs().empty()) { + LOGGER_WARN("No adhoc storage configurations available"); + ec = scord::error_code::snafu; + goto respond; + } + + if(const auto it = m_config->adhoc_storage_configs().find(adhoc_type); + it != m_config->adhoc_storage_configs().end()) { + const auto& adhoc_cfg = it->second; + + LOGGER_DEBUG("deploy \"{:e}\" (ID: {})", adhoc_type, adhoc_uuid); + + // 1. Construct the expand command for the adhoc storage instance + std::vector hostnames; + std::ranges::transform( + adhoc_resources.nodes(), std::back_inserter(hostnames), + [](const auto& node) { return node.hostname(); }); + + const auto cmd = adhoc_cfg.shrink_command().eval(adhoc_uuid, hostnames); + + // 4. Execute the startup command + try { + LOGGER_DEBUG("[{}] exec: {}", adhoc_uuid, cmd); + cmd.exec(); + } catch(const std::exception& ex) { + LOGGER_ERROR("[{}] Failed to execute shrink command: {}", + adhoc_uuid, ex.what()); + ec = scord::error_code::subprocess_error; + } + } else { + LOGGER_WARN( + "Failed to find adhoc storage configuration for type '{:e}'", + adhoc_type); + ec = scord::error_code::adhoc_type_unsupported; + } + +respond: + const generic_response resp{rpc.id(), ec}; + LOGGER_INFO("rpc {:<} body: {{retval: {}}}", rpc, resp.error_code()); + req.respond(resp); +} + + void rpc_server::terminate_adhoc_storage( const network::request& req, const std::string& adhoc_uuid, diff --git a/src/scord-ctl/rpc_server.hpp b/src/scord-ctl/rpc_server.hpp index fa0518b4b03ceca7cd79dc63594803d5550b75d1..f60eed37cb0a1e4cf60a4db78af673551309ed07 100644 --- a/src/scord-ctl/rpc_server.hpp +++ b/src/scord-ctl/rpc_server.hpp @@ -56,6 +56,18 @@ private: enum scord::adhoc_storage::type adhoc_type, const scord::adhoc_storage::resources& adhoc_resources); + void + expand_adhoc_storage( + const network::request& req, const std::string& adhoc_uuid, + enum scord::adhoc_storage::type adhoc_type, + const scord::adhoc_storage::resources& adhoc_resources); + + void + shrink_adhoc_storage( + const network::request& req, const std::string& adhoc_uuid, + enum scord::adhoc_storage::type adhoc_type, + const scord::adhoc_storage::resources& adhoc_resources); + void terminate_adhoc_storage(const network::request& req, const std::string& adhoc_uuid, diff --git a/src/scord/CMakeLists.txt b/src/scord/CMakeLists.txt index 5ccfb19ebfa0bd0d6e35fd7835bb90639ace92c2..839e283a2fd7dc781ef4b7c9b55132c2d30f423f 100644 --- a/src/scord/CMakeLists.txt +++ b/src/scord/CMakeLists.txt @@ -26,7 +26,7 @@ add_executable(scord) target_sources(scord PRIVATE scord.cpp - job_manager.hpp adhoc_storage_manager.hpp + job_manager.hpp adhoc_storage_manager.hpp transfer_manager.hpp pfs_storage_manager.hpp ${CMAKE_CURRENT_BINARY_DIR}/defaults.hpp internal_types.hpp internal_types.cpp rpc_server.hpp rpc_server.cpp) @@ -51,6 +51,7 @@ target_link_libraries( CLI11::CLI11 RedisPlusPlus::RedisPlusPlus ryml::ryml + cargo::cargo ) install(TARGETS scord DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/scord/internal_types.cpp b/src/scord/internal_types.cpp index 993cefb2a8a84df963f01bfd705c5b505c314d8d..2f723ef6119c7ee03863259e98fe6af8a0bb1e4f 100644 --- a/src/scord/internal_types.cpp +++ b/src/scord/internal_types.cpp @@ -79,6 +79,11 @@ adhoc_storage_metadata::controller_address() const { return m_adhoc_storage.context().controller_address(); } +std::string const& +adhoc_storage_metadata::data_stager_address() const { + return m_adhoc_storage.context().data_stager_address(); +} + void adhoc_storage_metadata::update(scord::adhoc_storage::resources new_resources) { m_adhoc_storage.update(std::move(new_resources)); diff --git a/src/scord/internal_types.hpp b/src/scord/internal_types.hpp index e22f862143587161b3c51aeae78264508a689d78..92c7ee8b3f6d7ed369add8f18313a2706d9deff4 100644 --- a/src/scord/internal_types.hpp +++ b/src/scord/internal_types.hpp @@ -81,6 +81,9 @@ struct adhoc_storage_metadata { std::string const& controller_address() const; + std::string const& + data_stager_address() const; + void update(scord::adhoc_storage::resources new_resources); @@ -114,6 +117,44 @@ struct pfs_storage_metadata { std::shared_ptr m_client_info; }; +template +struct transfer_metadata { + transfer_metadata(transfer_id id, TransferHandle&& handle, + std::vector qos) + : m_id(id), m_handle(handle), m_qos(std::move(qos)) {} + + transfer_id + id() const { + return m_id; + } + + TransferHandle + transfer() const { + return m_handle; + } + + std::vector const& + qos() const { + return m_qos; + } + + float + measured_bandwidth() const { + return m_measured_bandwidth; + } + + void + update(float bandwidth) { + m_measured_bandwidth = bandwidth; + } + + transfer_id m_id; + TransferHandle m_handle; + std::vector m_qos; + float m_measured_bandwidth = -1.0; +}; + + } // namespace scord::internal #endif // SCORD_INTERNAL_TYPES_HPP diff --git a/src/scord/rpc_server.cpp b/src/scord/rpc_server.cpp index 737b93197f24763dcb0c1f765d114830a49e6049..1448ff1fc2ee7f43565dc28a05540d296d53f10e 100644 --- a/src/scord/rpc_server.cpp +++ b/src/scord/rpc_server.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include "rpc_server.hpp" template @@ -275,7 +276,7 @@ rpc_server::register_adhoc_storage( LOGGER_INFO("rpc {:>} body: {{name: {}, type: {}, adhoc_ctx: {}, " "adhoc_resources: {}}}", - rpc, name, type, ctx, resources); + rpc, std::quoted(name), type, ctx, resources); scord::error_code ec; std::optional adhoc_id; @@ -314,6 +315,22 @@ rpc_server::update_adhoc_storage( LOGGER_INFO("rpc {:>} body: {{adhoc_id: {}, new_resources: {}}}", rpc, adhoc_id, new_resources); + + const auto pre_ec = m_adhoc_manager.find(adhoc_id); + + if(!pre_ec) { + LOGGER_ERROR( + "rpc id: {} error_msg: \"Error updating adhoc_storage: {}\"", + rpc.id(), scord::error_code::no_such_entity); + } + + const auto old_resources_size = pre_ec.value() + .get() + ->adhoc_storage() + .get_resources() + .nodes() + .size(); + const auto ec = m_adhoc_manager.update(adhoc_id, new_resources); if(!ec) { @@ -322,9 +339,69 @@ rpc_server::update_adhoc_storage( rpc.id(), ec); } - const auto resp = generic_response{rpc.id(), ec}; + bool expand = new_resources.nodes().size() > old_resources_size; - LOGGER_INFO("rpc {:<} body: {{retval: {}}}", rpc, ec); + /** + * @brief Helper lambda to contact the adhoc controller and prompt it to + * update an adhoc storage instance + * @param adhoc_storage The relevant `adhoc_storage` object with + * information about the instance to deploy. + * @return + */ + const auto update_helper = [&](const auto& adhoc_metadata_ptr) + -> tl::expected { + assert(adhoc_metadata_ptr); + const auto adhoc_storage = adhoc_metadata_ptr->adhoc_storage(); + const auto endp = lookup(adhoc_storage.context().controller_address()); + + if(!endp) { + LOGGER_ERROR("endpoint lookup failed"); + return tl::make_unexpected(scord::error_code::snafu); + } + + // const auto child_rpc = + // rpc.add_child(adhoc_storage.context().controller_address()); + + auto name = "ADM_expand_adhoc_storage"; + if(!expand) { + name = "ADM_shrink_adhoc_storage"; + } + + const auto child_rpc = rpc_info::create( + name, adhoc_storage.context().controller_address()); + + LOGGER_INFO("rpc {:<} body: {{uuid: {}, type: {}, resources: {}}}", + child_rpc, std::quoted(adhoc_metadata_ptr->uuid()), + adhoc_storage.type(), adhoc_storage.get_resources()); + + if(const auto call_rv = endp->call( + child_rpc.name(), adhoc_metadata_ptr->uuid(), + adhoc_storage.type(), adhoc_storage.get_resources()); + call_rv.has_value()) { + + const network::generic_response resp{call_rv.value()}; + + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc {:>} body: {{retval: {}}} [op_id: {}]", child_rpc, + resp.error_code(), resp.op_id()); + + return resp.error_code(); + } + + LOGGER_ERROR("rpc call failed"); + return tl::make_unexpected(error_code::snafu); + }; + + const auto rv = + m_adhoc_manager.find(adhoc_id) + .or_else([](auto&&) { + LOGGER_ERROR("adhoc storage instance not found"); + }) + .and_then(update_helper); + + const auto resp = generic_response(rpc.id(), rv.value()); + + LOGGER_INFO("rpc {:<} body: {{retval: {}}}", rpc, rv.value()); req.respond(resp); } @@ -517,8 +594,8 @@ rpc_server::register_pfs_storage(const network::request& req, const auto rpc = rpc_info::create(RPC_NAME(), get_address(req)); - LOGGER_INFO("rpc {:>} body: {{name: {}, type: {}, pfs_ctx: {}}}", rpc, name, - type, ctx); + LOGGER_INFO("rpc {:>} body: {{name: {}, type: {}, pfs_ctx: {}}}", rpc, + std::quoted(name), type, ctx); scord::error_code ec; std::optional pfs_id = 0; @@ -611,17 +688,72 @@ rpc_server::transfer_datasets(const network::request& req, scord::job_id job_id, "limits: {}, mapping: {}}}", rpc, job_id, sources, targets, limits, mapping); - scord::error_code ec; + const auto jm_result = m_job_manager.find(job_id); - std::optional tx_id; + if(!jm_result) { + LOGGER_ERROR("rpc id: {} error_msg: \"Error finding job: {}\"", + rpc.id(), job_id); + const auto resp = response_with_id{rpc.id(), jm_result.error()}; + LOGGER_ERROR("rpc {:<} body: {{retval: {}}}", rpc, resp.error_code()); + req.respond(resp); + return; + } + + const auto& job_metadata_ptr = jm_result.value(); + + if(!job_metadata_ptr->adhoc_storage_metadata()) { + LOGGER_ERROR("rpc id: {} error_msg: \"Job has no adhoc storage\"", + rpc.id(), job_id); + const auto resp = response_with_id{rpc.id(), error_code::no_resources}; + LOGGER_ERROR("rpc {:<} body: {{retval: {}}}", rpc, resp.error_code()); + req.respond(resp); + return; + } + + const auto data_stager_address = + job_metadata_ptr->adhoc_storage_metadata()->data_stager_address(); + + // Transform the `scord::dataset`s into `cargo::dataset`s and contact the + // Cargo service associated with the job's adhoc storage instance to + // execute the transfers. + cargo::server srv{data_stager_address}; + + std::vector inputs; + std::vector outputs; - // TODO: generate a global ID for the transfer and contact Cargo to - // actually request it - tx_id = 42; + // TODO: check type of storage tier to enable parallel transfers + std::transform(sources.cbegin(), sources.cend(), std::back_inserter(inputs), + [](const auto& src) { return cargo::dataset{src.id()}; }); - const auto resp = response_with_id{rpc.id(), ec, tx_id}; + std::transform(targets.cbegin(), targets.cend(), + std::back_inserter(outputs), + [](const auto& tgt) { return cargo::dataset{tgt.id()}; }); - LOGGER_INFO("rpc {:<} body: {{retval: {}, tx_id: {}}}", rpc, ec, tx_id); + const auto cargo_tx = cargo::transfer_datasets(srv, inputs, outputs); + + // Register the transfer into the `tranfer_manager`. + // We embed the generated `cargo::transfer` object into + // scord's `transfer_metadata` so that we can later query the Cargo + // service for the transfer's status. + const auto rv = + m_transfer_manager.create(cargo_tx, limits) + .or_else([&](auto&& ec) { + LOGGER_ERROR("rpc id: {} error_msg: \"Error creating " + "transfer: {}\"", + rpc.id(), ec); + }) + .and_then([&](auto&& transfer_metadata_ptr) + -> tl::expected { + return transfer_metadata_ptr->id(); + }); + + const auto resp = + rv ? response_with_id{rpc.id(), error_code::success, rv.value()} + : response_with_id{rpc.id(), rv.error()}; + + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc {:<} body: {{retval: {}, tx_id: {}}}", rpc, + resp.error_code(), resp.value_or_none()); req.respond(resp); } diff --git a/src/scord/rpc_server.hpp b/src/scord/rpc_server.hpp index 494c85fe164f77b40042639662fd2fda67065630..f86a60c8aa52ccacfddc44c9f416673e22aab093 100644 --- a/src/scord/rpc_server.hpp +++ b/src/scord/rpc_server.hpp @@ -31,6 +31,11 @@ #include "job_manager.hpp" #include "adhoc_storage_manager.hpp" #include "pfs_storage_manager.hpp" +#include "transfer_manager.hpp" + +namespace cargo { +class transfer; +} namespace scord { @@ -103,6 +108,7 @@ private: job_manager m_job_manager; adhoc_storage_manager m_adhoc_manager; pfs_storage_manager m_pfs_manager; + transfer_manager m_transfer_manager; }; } // namespace scord diff --git a/src/scord/transfer_manager.hpp b/src/scord/transfer_manager.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6abb74f2deb8e87d6bde6343dfcd45beef8800b4 --- /dev/null +++ b/src/scord/transfer_manager.hpp @@ -0,0 +1,129 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + *****************************************************************************/ + +#ifndef SCORD_TRANSFER_MANAGER_HPP +#define SCORD_TRANSFER_MANAGER_HPP + +#include +#include +#include +#include +#include +#include +#include +#include "internal_types.hpp" + +namespace scord { + +template +struct transfer_manager { + + tl::expected< + std::shared_ptr>, + scord::error_code> + create(TransferHandle tx, std::vector limits) { + + static std::atomic_uint64_t current_id; + scord::transfer_id id = current_id++; + + abt::unique_lock lock(m_transfer_mutex); + + if(const auto it = m_transfer.find(id); it == m_transfer.end()) { + const auto& [it_transfer, inserted] = m_transfer.emplace( + id, std::make_shared< + internal::transfer_metadata>( + id, std::move(tx), std::move(limits))); + + if(!inserted) { + LOGGER_ERROR("{}: Emplace failed", __FUNCTION__); + return tl::make_unexpected(scord::error_code::snafu); + } + + return it_transfer->second; + } + + LOGGER_ERROR("{}: Transfer '{}' already exists", __FUNCTION__, id); + return tl::make_unexpected(scord::error_code::entity_exists); + } + + scord::error_code + update(scord::transfer_id id, float obtained_bw) { + + abt::unique_lock lock(m_transfer_mutex); + + if(const auto it = m_transfer.find(id); it != m_transfer.end()) { + const auto& current_transfer_info = it->second; + current_transfer_info->update(obtained_bw); + return scord::error_code::success; + } + + LOGGER_ERROR("{}: Transfer '{}' does not exist", __FUNCTION__, id); + return scord::error_code::no_such_entity; + } + + tl::expected< + std::shared_ptr>, + scord::error_code> + find(scord::transfer_id id) { + + abt::shared_lock lock(m_transfer_mutex); + + if(auto it = m_transfer.find(id); it != m_transfer.end()) { + return it->second; + } + + LOGGER_ERROR("Transfer '{}' was not registered or was already deleted", + id); + return tl::make_unexpected(scord::error_code::no_such_entity); + } + + tl::expected< + std::shared_ptr>, + scord::error_code> + remove(scord::transfer_id id) { + + abt::unique_lock lock(m_transfer_mutex); + + if(const auto it = m_transfer.find(id); it != m_transfer.end()) { + auto nh = m_transfer.extract(it); + return nh.mapped(); + } + + LOGGER_ERROR("Transfer '{}' was not registered or was already deleted", + id); + + return tl::make_unexpected(scord::error_code::no_such_entity); + } + +private: + mutable abt::shared_mutex m_transfer_mutex; + std::unordered_map< + scord::transfer_id, + std::shared_ptr>> + m_transfer; +}; + +} // namespace scord + +#endif // SCORD_TRANSFER_MANAGER_HPP diff --git a/tests/test.cpp b/tests/test.cpp index 31cdd319e82d4b8c9d082a665b97f3879598f5c6..ff2fc2de679b944e80987eb93716875cbab89825 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -46,6 +46,10 @@ SCENARIO("Error messages can be printed", "[lib][ADM_strerror]") { "Cannot allocate memory"); } + WHEN("The error number is ADM_ETIMEOUT") { + REQUIRE(std::string{ADM_strerror(ADM_ETIMEOUT)} == + "Timeout"); + } WHEN("The error number is ADM_EOTHER") { REQUIRE(std::string{ADM_strerror(ADM_EOTHER)} == "Undetermined error");