Commit 1651bbea authored by Alberto Miranda's avatar Alberto Miranda ♨️
Browse files

Merge branch 'amiranda/59-add-adm_job_t-to-adm_adhoc_contex_t' into 'main'

Resolve "API does not provide facilities to communicate job and adhoc hostnames"

This MR extends the current API by introducing two new structures
`ADM_job_resources_t` and `ADM_adhoc_resources_t`.

- [x] `ADM_adhoc_resources_t` is renamed from `ADM_storage_resources_t` 
and now contains a list of nodes assigned for the adhoc storage instance 
(the PFS is more of a static thing, so it doesn't make sense to have a 
generic interface for this). An `ADM_adhoc_resources_t` argument is added 
to `ADM_adhoc_context_create()` and is propagated through RPCs. This 
allows resource managers (i.e. Slurm or the IC) to inform us about the 
nodes assigned for an `adhoc_storage`.
- [x] A new structure `ADM_job_resources_t` is added to the API, which for 
now contains a list of nodes assigned to a job. An `ADM_job_resources_t` 
argumet is added to `ADM_register_job()` and related functions and is 
propagated to `scord` via RPCs. This allows resource managers to inform 
`scord` about the nodes assigned for a job.

Closes #59

See merge request !43
parents a36ed9cb 481cbbcf
Loading
Loading
Loading
Loading
Loading
+19 −4
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@
#include <assert.h>
#include "common.h"

#define NJOB_NODES   50
#define NADHOC_NODES 25
#define NINPUTS      10
#define NOUTPUTS     5

@@ -44,13 +46,26 @@ main(int argc, char* argv[]) {
    ADM_server_t server = ADM_server_create("tcp", argv[1]);

    ADM_job_t job;
    ADM_node_t* job_nodes = prepare_nodes(NJOB_NODES);
    assert(job_nodes);
    ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES);
    assert(adhoc_nodes);
    ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS);
    assert(inputs);
    ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS);
    assert(outputs);

    ADM_job_resources_t job_resources =
            ADM_job_resources_create(job_nodes, NJOB_NODES);
    assert(job_resources);

    ADM_adhoc_resources_t adhoc_resources =
            ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES);
    assert(adhoc_resources);

    ADM_adhoc_context_t ctx = ADM_adhoc_context_create(
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 42, 100, false);
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, adhoc_resources,
            100, false);
    assert(ctx);

    ADM_storage_t st = ADM_storage_create("foobar", ADM_STORAGE_GEKKOFS, ctx);
@@ -60,7 +75,7 @@ main(int argc, char* argv[]) {
            ADM_job_requirements_create(inputs, NINPUTS, outputs, NOUTPUTS, st);
    assert(reqs);

    ADM_return_t ret = ADM_register_job(server, reqs, &job);
    ADM_return_t ret = ADM_register_job(server, job_resources, reqs, &job);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_job() remote procedure not completed "
+19 −4
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@
#include <assert.h>
#include "common.h"

#define NJOB_NODES   50
#define NADHOC_NODES 25
#define NINPUTS      10
#define NOUTPUTS     5

@@ -44,23 +46,36 @@ main(int argc, char* argv[]) {
    ADM_server_t server = ADM_server_create("tcp", argv[1]);

    ADM_job_t job;
    ADM_node_t* job_nodes = prepare_nodes(NJOB_NODES);
    assert(job_nodes);
    ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES);
    assert(adhoc_nodes);
    ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS);
    assert(inputs);
    ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS);
    assert(outputs);

    ADM_adhoc_resources_t adhoc_resources =
            ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES);
    assert(adhoc_resources);

    ADM_adhoc_context_t ctx = ADM_adhoc_context_create(
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 42, 100, false);
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, adhoc_resources,
            100, false);
    assert(ctx);

    ADM_storage_t st = ADM_storage_create("foobar", ADM_STORAGE_GEKKOFS, ctx);
    assert(st);

    ADM_job_resources_t job_resources =
            ADM_job_resources_create(job_nodes, NJOB_NODES);
    assert(job_resources);

    ADM_job_requirements_t reqs =
            ADM_job_requirements_create(inputs, NINPUTS, outputs, NOUTPUTS, st);
    assert(reqs);

    ADM_return_t ret = ADM_register_job(server, reqs, &job);
    ADM_return_t ret = ADM_register_job(server, job_resources, reqs, &job);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_job() remote procedure not completed "
+19 −4
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@
#include <assert.h>
#include "common.h"

#define NJOB_NODES   50
#define NADHOC_NODES 25
#define NINPUTS      10
#define NOUTPUTS     5

@@ -45,13 +47,26 @@ main(int argc, char* argv[]) {
    ADM_server_t server = ADM_server_create("tcp", argv[1]);

    ADM_job_t job;
    ADM_node_t* job_nodes = prepare_nodes(NJOB_NODES);
    assert(job_nodes);
    ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES);
    assert(adhoc_nodes);
    ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS);
    assert(inputs);
    ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS);
    assert(outputs);

    ADM_job_resources_t job_resources =
            ADM_job_resources_create(job_nodes, NJOB_NODES);
    assert(job_resources);

    ADM_adhoc_resources_t adhoc_resources =
            ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES);
    assert(adhoc_resources);

    ADM_adhoc_context_t ctx = ADM_adhoc_context_create(
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 42, 100, false);
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, adhoc_resources,
            100, false);
    assert(ctx);

    ADM_storage_t st = ADM_storage_create("foobar", ADM_STORAGE_GEKKOFS, ctx);
@@ -61,7 +76,7 @@ main(int argc, char* argv[]) {
            ADM_job_requirements_create(inputs, NINPUTS, outputs, NOUTPUTS, st);
    assert(reqs);

    ADM_return_t ret = ADM_register_job(server, reqs, &job);
    ADM_return_t ret = ADM_register_job(server, job_resources, reqs, &job);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_job() remote procedure not completed "
+19 −4
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@
#include <assert.h>
#include "common.h"

#define NJOB_NODES   50
#define NADHOC_NODES 25
#define NINPUTS      10
#define NOUTPUTS     5

@@ -44,13 +46,26 @@ main(int argc, char* argv[]) {
    ADM_server_t server = ADM_server_create("tcp", argv[1]);

    ADM_job_t job;
    ADM_node_t* job_nodes = prepare_nodes(NJOB_NODES);
    assert(job_nodes);
    ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES);
    assert(adhoc_nodes);
    ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS);
    assert(inputs);
    ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS);
    assert(outputs);

    ADM_job_resources_t job_resources =
            ADM_job_resources_create(job_nodes, NJOB_NODES);
    assert(job_resources);

    ADM_adhoc_resources_t adhoc_resources =
            ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES);
    assert(adhoc_resources);

    ADM_adhoc_context_t ctx = ADM_adhoc_context_create(
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 42, 100, false);
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, adhoc_resources,
            100, false);
    assert(ctx);

    ADM_storage_t st = ADM_storage_create("foobar", ADM_STORAGE_GEKKOFS, ctx);
@@ -60,7 +75,7 @@ main(int argc, char* argv[]) {
            ADM_job_requirements_create(inputs, NINPUTS, outputs, NOUTPUTS, st);
    assert(reqs);

    ADM_return_t ret = ADM_register_job(server, reqs, &job);
    ADM_return_t ret = ADM_register_job(server, job_resources, reqs, &job);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_job() remote procedure not completed "
+19 −4
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@
#include <assert.h>
#include "common.h"

#define NJOB_NODES   50
#define NADHOC_NODES 25
#define NINPUTS      10
#define NOUTPUTS     5

@@ -45,23 +47,36 @@ main(int argc, char* argv[]) {
    ADM_server_t server = ADM_server_create("tcp", argv[1]);

    ADM_job_t job;
    ADM_node_t* job_nodes = prepare_nodes(NJOB_NODES);
    assert(job_nodes);
    ADM_node_t* adhoc_nodes = prepare_nodes(NADHOC_NODES);
    assert(adhoc_nodes);
    ADM_dataset_t* inputs = prepare_datasets("input-dataset-%d", NINPUTS);
    assert(inputs);
    ADM_dataset_t* outputs = prepare_datasets("output-dataset-%d", NOUTPUTS);
    assert(outputs);

    ADM_adhoc_resources_t adhoc_resources =
            ADM_adhoc_resources_create(adhoc_nodes, NADHOC_NODES);
    assert(adhoc_resources);

    ADM_adhoc_context_t ctx = ADM_adhoc_context_create(
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, 42, 100, false);
            ADM_ADHOC_MODE_SEPARATE_NEW, ADM_ADHOC_ACCESS_RDWR, adhoc_resources,
            100, false);
    assert(ctx);

    ADM_storage_t st = ADM_storage_create("foobar", ADM_STORAGE_GEKKOFS, ctx);
    assert(st);

    ADM_job_resources_t job_resources =
            ADM_job_resources_create(job_nodes, NJOB_NODES);
    assert(job_resources);

    ADM_job_requirements_t reqs =
            ADM_job_requirements_create(inputs, NINPUTS, outputs, NOUTPUTS, st);
    assert(reqs);

    ADM_return_t ret = ADM_register_job(server, reqs, &job);
    ADM_return_t ret = ADM_register_job(server, job_resources, reqs, &job);

    if(ret != ADM_SUCCESS) {
        fprintf(stdout, "ADM_register_job() remote procedure not completed "
Loading