Commit 0ccff750 authored by Ramon Nou's avatar Ramon Nou
Browse files

Merge branch 'amiranda/85-adhoc_storage-instances-should-keep-track-of-jobs-using-them' into 'main'

Resolve "adhoc_storage instances should keep track of jobs using them"

This MR associates registered adhoc storage instances with registered jobs.

To do so, it modifies both the `job_manager` and the `adhoc_storage_manager`
classes to store `shared_ptr`s to `admire::internal::job_info` and 
`admire::internal::adhoc_storage_info` records. This allows returning the 
pointers when checking for information as well as modifying the records in
a synchronized manner.

To keep track of adhoc_storage usage, the new internal `adhoc_storage_info` 
type includes a `std::shared_ptr<job_info> m_client` field that models the 
association. As of today, an adhoc storage instance can only have a single
job client associated. Trying to add more will result in error.

Closes #85 #86 #88

See merge request !58
parents 1d610a0b c5de2913
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -200,6 +200,7 @@ FetchContent_Declare(
)

FetchContent_MakeAvailable(fmt)
set_target_properties(fmt PROPERTIES POSITION_INDEPENDENT_CODE ON)

### spdlog: required for logging
message(STATUS "[${PROJECT_NAME}] Downloading and building spdlog")
+19 −9
Original line number Diff line number Diff line
@@ -45,7 +45,7 @@ main(int argc, char* argv[]) {
    int exit_status = EXIT_SUCCESS;
    ADM_server_t server = ADM_server_create("tcp", argv[1]);

    ADM_job_t job;
    ADM_job_t job = NULL;
    ADM_node_t* job_nodes = prepare_nodes(NJOB_NODES);
    assert(job_nodes);
    ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES);
@@ -75,8 +75,10 @@ main(int argc, char* argv[]) {
            server, name, ADM_STORAGE_GEKKOFS, ctx, &adhoc_storage);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_adhoc_storage() remote procedure not "
                        "completed successfully\n");
        fprintf(stderr,
                "ADM_register_adhoc_storage() remote procedure not "
                "completed successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -89,9 +91,12 @@ main(int argc, char* argv[]) {
    ret = ADM_register_job(server, job_resources, reqs, slurm_job_id, &job);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_job() remote procedure not completed "
                        "successfully\n");
        fprintf(stderr,
                "ADM_register_job() remote procedure not completed "
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }

    ADM_dataset_t* sources = NULL;
@@ -107,8 +112,10 @@ main(int argc, char* argv[]) {
                                targets_len, limits, limits_len, mapping, &tx);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_transfer_datasets() remote procedure not "
                        "completed successfully\n");
        fprintf(stderr,
                "ADM_transfer_datasets() remote procedure not "
                "completed successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -116,8 +123,10 @@ main(int argc, char* argv[]) {
    ret = ADM_cancel_transfer(server, job, tx);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_cancel_transfer() remote procedure not completed "
                        "successfully\n");
        fprintf(stderr,
                "ADM_cancel_transfer() remote procedure not completed "
                "successfully\n, %s",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -126,6 +135,7 @@ main(int argc, char* argv[]) {
                    "successfully\n");

cleanup:
    ADM_remove_job(server, job);
    ADM_server_destroy(server);
    destroy_datasets(inputs, NINPUTS);
    destroy_datasets(outputs, NOUTPUTS);
+12 −6
Original line number Diff line number Diff line
@@ -71,9 +71,10 @@ main(int argc, char* argv[]) {
            server, name, ADM_STORAGE_GEKKOFS, ctx, &adhoc_storage);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout,
        fprintf(stderr,
                "ADM_register_adhoc_storage() remote procedure not completed "
                "successfully\n");
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -90,9 +91,12 @@ main(int argc, char* argv[]) {
    ret = ADM_register_job(server, job_resources, reqs, slurm_job_id, &job);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_job() remote procedure not completed "
                        "successfully\n");
        fprintf(stderr,
                "ADM_register_job() remote procedure not completed "
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }

    exit_status = EXIT_SUCCESS;
@@ -103,9 +107,10 @@ main(int argc, char* argv[]) {


    if(ret != ADM_SUCCESS) {
        fprintf(stdout,
        fprintf(stderr,
                "ADM_connect_data_operation() remote procedure not completed "
                "successfully\n");
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -122,6 +127,7 @@ cleanup:
        ADM_dataset_destroy(outputs[i]);
    }

    ADM_remove_job(server, job);
    ADM_server_destroy(server);
    exit(exit_status);
}
+13 −8
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ main(int argc, char* argv[]) {

    ADM_server_t server = ADM_server_create("tcp", argv[1]);

    ADM_job_t job;
    ADM_job_t job = NULL;
    ADM_node_t* job_nodes = prepare_nodes(NJOB_NODES);
    assert(job_nodes);
    ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES);
@@ -76,9 +76,10 @@ main(int argc, char* argv[]) {
            server, name, ADM_STORAGE_GEKKOFS, ctx, &adhoc_storage);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout,
        fprintf(stderr,
                "ADM_register_adhoc_storage() remote procedure not completed "
                "successfully\n");
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -91,9 +92,12 @@ main(int argc, char* argv[]) {
    ret = ADM_register_job(server, job_resources, reqs, slurm_job_id, &job);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_job() remote procedure not completed "
                        "successfully\n");
        fprintf(stderr,
                "ADM_register_job() remote procedure not completed "
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }

    ADM_data_operation_t op;
@@ -103,9 +107,10 @@ main(int argc, char* argv[]) {
    ret = ADM_define_data_operation(server, job, path, &op);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout,
        fprintf(stderr,
                "ADM_define_data_operation() remote procedure not completed "
                "successfully\n");
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -114,7 +119,7 @@ main(int argc, char* argv[]) {
                    "successfully\n");

cleanup:

    ADM_remove_job(server, job);
    ADM_server_destroy(server);
    exit(exit_status);
}
+6 −5
Original line number Diff line number Diff line
@@ -67,9 +67,10 @@ main(int argc, char* argv[]) {
            server, name, ADM_STORAGE_GEKKOFS, ctx, &adhoc_storage);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout,
        fprintf(stderr,
                "ADM_register_adhoc_storage() remote procedure not completed "
                "successfully\n");
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -80,9 +81,10 @@ main(int argc, char* argv[]) {
    ret = ADM_deploy_adhoc_storage(server, adhoc_storage);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout,
        fprintf(stderr,
                "ADM_deploy_adhoc_storage() remote procedure not completed "
                "successfully\n");
                "successfully: %s\n",
                ADM_strerror(ret));
        exit_status = EXIT_FAILURE;
        goto cleanup;
    }
@@ -91,7 +93,6 @@ main(int argc, char* argv[]) {
                    "successfully\n");

cleanup:

    ADM_server_destroy(server);
    exit(exit_status);
}
Loading