Skip to content
Snippets Groups Projects
admire.h 22.86 KiB
/******************************************************************************
 * Copyright 2021-2022, Barcelona Supercomputing Center (BSC), Spain
 *
 * This software was partially supported by the EuroHPC-funded project ADMIRE
 *   (Project ID: 956748, https://www.admire-eurohpc.eu).
 *
 * This file is part of scord.
 *
 * scord is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * scord is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with scord.  If not, see <https://www.gnu.org/licenses/>.
 *
 * SPDX-License-Identifier: GPL-3.0-or-later
 *****************************************************************************/

#ifndef SCORD_ADMIRE_H
#define SCORD_ADMIRE_H

#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>

#ifdef __cplusplus
extern "C" {
#endif

#define ADM_IOSCHED_API_VERSION       "0.1.0"
#define ADM_IOSCHED_API_VERSION_MAJOR 0
#define ADM_IOSCHED_API_VERSION_MINOR 1
#define ADM_IOSCHED_API_VERSION_PATCH 0


/******************************************************************************/
/* Public type definitions and type-related functions                         */
/******************************************************************************/

/* Error return codes */
typedef enum {
    ADM_SUCCESS = 0,
    ADM_ESNAFU,
    ADM_EBADARGS,
    ADM_ENOMEM,
    ADM_EOTHER,
    ADM_ERR_MAX = 512
} ADM_return_t;

/* A server */
typedef struct adm_server* ADM_server_t;

/**
 * Initialize a server from a user-provided name/address.
 *
 * @remark Servers need to be freed by calling ADM_server_destroy().
 *
 * @param[in] protocol The protocol that will be used to access the server.
 * @param[in] address The address of server.
 * @return A valid ADM_server_t if successful or NULL in case of failure.
 */
ADM_server_t
ADM_server_create(const char* protocol, const char* address);
/**
 * Destroy a server created by ADM_server_create().
 *
 * @param[in] server A pointer to a ADM_server_t
 * @return ADM_SUCCESS or corresponding ADM error code
 */
ADM_return_t
ADM_server_destroy(ADM_server_t server);

/* A node */
typedef struct adm_node* ADM_node_t;

/**
 * Initialize a node from a user-provided hostname/address.
 *
 * @remark Nodes need to be freed by calling ADM_server_destroy().
 *
 * @param[in] hostname The hostname of the node.
 * @return A valid ADM_server_t if successful or NULL in case of failure.
 */
ADM_node_t
ADM_node_create(const char* hostname);

/**
 * Destroy a node created by ADM_node_create().
 *
 * @param[in] node A valid ADM_node_t
 * @return ADM_SUCCESS or corresponding ADM error code
 */
ADM_return_t
ADM_node_destroy(ADM_node_t node);


/* A dataset handle */
typedef struct adm_dataset* ADM_dataset_handle_t;

/**
 * Create a dataset from a user-provided id (e.g. a path for POSIX-like file
 * systems or key for key-value stores).
 *
 * @remark Datasets need to be freed by calling ADM_dataset_destroy().
 *
 * @param[in] id The id for the dataset.
 * @return A valid ADM_dataset_handle_t if successful or NULL in case of
 * failure.
 */
ADM_dataset_handle_t
ADM_dataset_create(const char* id);

ADM_return_t
ADM_dataset_destroy(ADM_dataset_handle_t dataset);

/* A job handle */
typedef struct adm_job* ADM_job_t;

/* The scope affected by a QoS limit */
typedef enum {
    ADM_QOS_SCOPE_DATASET,
    ADM_QOS_SCOPE_NODE,
    ADM_QOS_SCOPE_JOB
} ADM_qos_scope_t;

/** The class of QoS limit applied to a scope */
typedef enum { ADM_QOS_CLASS_BANDWIDTH, ADM_QOS_CLASS_IOPS } ADM_qos_class_t;

/** An ADMIRE entity upon which QoS can be defined */
typedef struct adm_qos_entity* ADM_qos_entity_t;

/**
 * Create a QoS entity given a scope, a node, a dataset, or a job.
 *
 * @remark QoS entities need to be freed by calling ADM_qos_entity_destroy().
 *
 * @param scope The scope of the entity, i.e. ADM_QOS_SCOPE_DATASET,
 * ADM_QOS_SCOPE_NODE, or ADM_QOS_SCOPE_JOB.
 * @param ... A single argument with data from either a ADM_dataset_t,
 * ADM_node_t, or ADM_job_t variable. The argument must correspond properly
 * to the scope provided.
 * @return A valid ADM_qos_entity_t if successful or NULL in case of failure.
 */
ADM_qos_entity_t
ADM_qos_entity_create(ADM_qos_scope_t scope, ...);

/**
 * Destroy a QoS entity created by ADM_qos_entity_create().
 *
 * @param[in] entity A valid ADM_qos_entity_t
 * @return ADM_SUCCESS or corresponding ADM error code
 */
ADM_return_t
ADM_qos_entity_destroy(ADM_qos_entity_t entity);


/** A QoS limit */
typedef struct {
    // TODO: empty for now
    ADM_qos_scope_t l_scope;
    ADM_qos_class_t l_class;
    ADM_qos_entity_t l_element;
} ADM_limit_t;

/** A transfer mapping */
typedef enum {
    ADM_MAPPING_ONE_TO_ONE,
    ADM_MAPPING_ONE_TO_N,
    ADM_MAPPING_N_TO_N
} ADM_tx_mapping_t;

/** A handle to a created transfer */
typedef struct {
    // TODO: empty for now
} ADM_transfer_handle_t;

/** Information about a dataset */
typedef struct {
    // TODO: empty for now
} ADM_dataset_info_t;

/** A storage tier handle */
typedef struct {
    // TODO: empty for now
} ADM_storage_handle_t;

/** Information about resources assigned to a storage tier */
typedef struct {
    // TODO: empty for now
} ADM_storage_resources_t;

typedef int ADM_transfer_priority_t;

typedef struct {
    // TODO: empty for now
} ADM_data_operation_handle_t;

typedef struct {
    // TODO: empty for now
} ADM_data_operation_status_t;

typedef struct {
    // TODO: empty for now
} ADM_job_stats_t;

/** Execution modes for an adhoc storage system */
typedef enum {
    ADM_ADHOC_MODE_IN_JOB_SHARED,
    ADM_ADHOC_MODE_IN_JOB_DEDICATED,
    ADM_ADHOC_MODE_SEPARATE_NEW,
    ADM_ADHOC_MODE_SEPARATE_EXISTING
} ADM_adhoc_mode_t;

/** Access modes for an adhoc storage system */
typedef enum {
    ADM_ADHOC_ACCESS_RDONLY,
    ADM_ADHOC_ACCESS_WRONLY,
    ADM_ADHOC_ACCESS_RDWR,
} ADM_adhoc_access_t;

/** Abstract type to represent data distributions for adhoc storage systems */
typedef struct adm_adhoc_data_distribution* ADM_adhoc_data_distribution_t;

/** The context for an  adhoc storage instance */
typedef struct {
    /** The adhoc storage system execution mode */
    ADM_adhoc_mode_t c_mode;
    /** The adhoc storage system access type */
    ADM_adhoc_access_t c_access;
    /** The number of nodes for the adhoc storage system */
    uint32_t c_nodes;
    /** The adhoc storage system walltime */
    uint32_t c_walltime;
    /** Whether the adhoc storage system should flush data in the background */
    bool c_should_bg_flush;
} ADM_adhoc_context_t;

typedef ADM_adhoc_context_t* ADM_adhoc_storage_handle_t;

/** The I/O requirements for a job */
typedef struct adm_job_requirements* ADM_job_requirements_t;

/**
 * Create a JOB_REQUIREMENTS from user-provided information.
 *
 * @remark JOB_REQUIREMENTS created by this function need to be freed by calling
 * ADM_job_requirements_destroy().
 *
 * @param[in] inputs An array of DATASET_DESCRIPTORS describing the input
 * information required by the job.
 * @param[in] inputs_len The number of DATASET_DESCRIPTORS stored in inputs.
 * @param[in] outputs An array of DATASET_DESCRIPTORS describing the output
 * information generated by the job.
 * @param[in] outputs_len The number of DATASET_DESCRIPTORS stored in outputs.
 * @param[in] adhoc_storage An optional ADHOC_DESCRIPTOR describing the adhoc
 * storage system required by the job (can be set to NULL if no adhoc storage
 * system is required).
 * @return A valid ADM_job_requirements_t if sucessfull or NULL in case of
 * failure.
 */
ADM_job_requirements_t
ADM_job_requirements_create(ADM_dataset_handle_t inputs[], size_t inputs_len,
                            ADM_dataset_handle_t outputs[], size_t outputs_len,
                            ADM_adhoc_storage_handle_t adhoc_storage);

/**
 * Destroy a ADM_job_requirements_t created by ADM_job_requirements_create().
 *
 * @param[in] reqs The ADM_job_requirements_t to destroy.
 * @return ADM_SUCCESS or corresponding error code.
 */
ADM_return_t
ADM_job_requirements_destroy(ADM_job_requirements_t reqs);


/******************************************************************************/
/* Public prototypes                                                          */
/******************************************************************************/

/**
 * Send an RPC to a server to check if it's online.
 *
 * @param[in] server The server to which the request is directed
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 * successfully.
 */
ADM_return_t
ADM_ping(ADM_server_t server);

/**
 * Register a job and its requirements.
 *
 * @remark The returned ADM_JOB will be freed when passed to
 * ADM_remove_job().
 *
 * @param[in] server The server to which the request is directed
 * @param[in] reqs The requirements for the job.
 * @param[out] job An ADM_JOB referring to the newly-registered job.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 * successfully.
 */
ADM_return_t
ADM_register_job(ADM_server_t server, ADM_job_requirements_t reqs,
                 ADM_job_t* job);

ADM_return_t
ADM_update_job(ADM_server_t server, ADM_job_t job, ADM_job_requirements_t reqs);

ADM_return_t
ADM_remove_job(ADM_server_t server, ADM_job_t job);

/**
 * Register an adhoc storage system.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] ctx The EXECUTION_CONTEXT for the adhoc storage system.
 * @param[out] adhoc_handle An ADHOC_HANDLE referring to the newly-created
 * adhoc storage instance.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 * successfully.
 */
ADM_return_t
ADM_register_adhoc_storage(ADM_server_t server, ADM_job_t job,
                           ADM_adhoc_context_t ctx,
                           ADM_adhoc_storage_handle_t* adhoc_handle);

/**
 * Update an already-registered adhoc storage system.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] ctx The updated EXECUTION_CONTEXT for the adhoc storage system.
 * @param[in] adhoc_handle An ADHOC_HANDLE referring to the adhoc storage
 * instance of interest.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 * successfully.
 */
ADM_return_t
ADM_update_adhoc_storage(ADM_server_t server, ADM_job_t job,
                         ADM_adhoc_context_t ctx,
                         ADM_adhoc_storage_handle_t adhoc_handle);
/**
 * Remove an already-registered adhoc storage system.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] adhoc_handle An ADHOC_HANDLE referring to the adhoc storage
 * instance of interest.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 * successfully.
 */
ADM_return_t
ADM_remove_adhoc_storage(ADM_server_t server, ADM_job_t job,
                         ADM_adhoc_storage_handle_t adhoc_handle);

/**
 * Initiate the deployment of an adhoc storage system instance.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] adhoc_handle An ADHOC_HANDLE referring to the adhoc storage
 * instance of interest.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_deploy_adhoc_storage(ADM_server_t server, ADM_job_t job,
                         ADM_adhoc_storage_handle_t adhoc_handle);

/**
 * Transfers the dataset identified by the source_name to the storage tier
 * defined by destination_name, and apply the provided constraints during the
 * transfer. This function returns a handle that can be used to track the
 * operation (i.e., get statistics, or status).
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] sources A list of DATASETs identifying the source dataset/s
 * to be transferred.
 * @param[in] targets A list of DATASETs identifying the destination
 * dataset/s and its/their desired locations in a storage tier.
 * @param[in] limits A list of QOS_CONSTRAINTS that must be applied to
 * the transfer. These may not exceed the global ones set at node, application,
 * or resource level.
 * @param[in] mapping A distribution strategy for the transfers (e.g.
 * ONE_TO_ONE, ONE_TO_MANY, MANY_TO_MANY)
 * @param[out] transfer_handle A TRANSFER_HANDLE allowing clients to interact
 * with the transfer (e.g. wait for its completion, query its status, cancel it,
 * etc.
 * @return Returns if the remote procedure has been completed
 * successfully or not.
 */
ADM_return_t
ADM_transfer_dataset(ADM_server_t server, ADM_job_t job,
                     ADM_dataset_handle_t** sources,
                     ADM_dataset_handle_t** targets, ADM_limit_t** limits,
                     ADM_tx_mapping_t mapping,
                     ADM_transfer_handle_t* tx_handle);


/**
 * Sets information for the dataset identified by resource_id.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] target A DATASET_HANDLE referring to the dataset of interest.
 * @param[in] info A DATASET_INFO with information about the
 * dataset (e.g. its lifespan, access methods, intended usage, etc.).
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 * successfully.
 */
ADM_return_t
ADM_set_dataset_information(ADM_server_t server, ADM_job_t job,
                            ADM_dataset_handle_t target,
                            ADM_dataset_info_t info);

/**
 * Changes the I/O resources used by a storage tier, typically an Ad hoc Storage
 * System.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] tier A STORAGE_HANDLE referring to the target storage tier.
 * @param[in] resources A RESOURCES argument containing information
 * about the I/O resources to set (e.g. number of I/O nodes.).
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 * successfully.
 */
ADM_return_t
ADM_set_io_resources(ADM_server_t server, ADM_job_t job,
                     ADM_storage_handle_t tier,
                     ADM_storage_resources_t resources);


/**
 * Returns the priority of the pending transfer identified by transfer_id.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] tx_handle A TRANSFER_HANDLE referring to a pending transfer
 * @param[out] priority The priority of the pending transfer or an error code if
 * it didn’t exist or is no longer pending.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 * successfully.
 */
ADM_return_t
ADM_get_transfer_priority(ADM_server_t server, ADM_job_t job,
                          ADM_transfer_handle_t tx_handle,
                          ADM_transfer_priority_t* priority);


/**
 * Moves the operation identified by transfer_id up or down by n positions in
 * its scheduling queue.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] tx_handle A TRANSFER_HANDLE referring to a pending transfer
 * @param[in] incr A positive or negative number for the number of
 * positions the transfer should go up or down in its scheduling queue.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_set_transfer_priority(ADM_server_t server, ADM_job_t job,
                          ADM_transfer_handle_t tx_handle, int incr);


/**
 * Cancels the pending transfer identified by transfer_id.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] tx_handle A TRANSFER_HANDLE referring to a pending transfer.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_cancel_transfer(ADM_server_t server, ADM_job_t job,
                    ADM_transfer_handle_t tx_handle);


/**
 * Returns a list of pending transfers.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[out] pending_transfers  A list of pending_transfers.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_get_pending_transfers(ADM_server_t server, ADM_job_t job,
                          ADM_transfer_handle_t** pending_transfers);


/**
 * Registers a QoS constraint defined by class, scope, and value for the element
 * identified by id.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] limit A QOS_LIMIT specifying at least:
 *                  - The QOS_SCOPE the limit  should be applied to: e.g.
 *                    dataset, node, or job.
 *                  - The QOS_CLASS of the limit (e.g. "bandwidth", "iops",
 *                  etc.).
 *                  - The QOS_ENTITY it should be applied to (e.g. job, node,
 *                  dataset, etc.)
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_set_qos_constraints(ADM_server_t server, ADM_job_t job, ADM_limit_t limit);


/**
 * Returns a list of QoS constraints defined for an element identified for id.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] scope The scope being queried: dataset, node, or job.
 * @param[in] entity An QOS_ENTITY referring to the target of the query, i.e. a
 * RESOURCE_HANDLE, a NODE hostname, or a ADM_JOB.
 * @param[in] limits A list of QOS_LIMITS that includes all the classes
 * currently defined for the element as well as the values set for them.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_get_qos_constraints(ADM_server_t server, ADM_job_t job,
                        ADM_qos_scope_t scope, ADM_qos_entity_t entity,
                        ADM_limit_t** limits);


/**
 * Defines a new operation, with the code found in path. The code will be
 * identified by the user-provided operation_id and will accept the arguments
 * defined, using the next format "arg0, arg1, arg2, ... ".
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] path A valid path for the operation executable.
 * @param[in] ... A list of ARGUMENTS for the operation.
 * @param[out] op  An OPERATION_HANDLE for the newly-defined operation.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_define_data_operation(ADM_server_t server, ADM_job_t job, const char* path,
                          ADM_data_operation_handle_t* op, ...);


/**
 * Connects and starts the data operation referred to by OPERATION_HANDLE and
 * with the arguments, using the input and output data storage (i.e., files). If
 * the operation can be executed in a streaming fashion (i.e., it can start even
 * if the input data is not entirely available), the stream parameter must be
 * set to true.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] op The OPERATION_HANDLE of the operation to be connected.
 * @param[in] input An input DATASET_HANDLE for the operation.
 * @param[in] output An output DATASET_HANDLE where the result of
 * the operation should be stored.
 * @param[in] should_stream A boolean indicating if the operation
 * should be executed in a streaming fashion.
 * @param[in] ... The VALUES for the arguments required by the operation.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_connect_data_operation(ADM_server_t server, ADM_job_t job,
                           ADM_dataset_handle_t input,
                           ADM_dataset_handle_t output, bool should_stream,
                           ...);


/**
 * Finalises the operation defined with operation_id.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] op The OPERATION_HANDLE of the operation to be connected.
 * @return[out] status An OPERATION_STATUS type indicating whether the
 * operation was successful.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_finalize_data_operation(ADM_server_t server, ADM_job_t job,
                            ADM_data_operation_handle_t op,
                            ADM_data_operation_status_t* status);


/**
 * Links the data operation defined with operation_id with the pending transfer
 * identified by transf er_id using the values provided as arguments. If the
 * operation can be executed in a streaming fashion (i.e., it can start even if
 * the input data is not entirely available), the stream parameter must be set
 * to true.
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] op The OPERATION_HANDLE of the operation to be connected.
 * @param[in] tx_handle The TRANSFER_HANDLE referring to the pending transfer
 * the operation should be linked to.
 * @param[in] job An ADM_JOB identifying the originating job.
 * @param[in] should_stream A boolean indicating whether the operation
 * should be executed in a streaming fashion.
 * @param[in] ... The VALUES for the arguments required by the operation.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_link_transfer_to_data_operation(ADM_server_t server, ADM_job_t job,
                                    ADM_data_operation_handle_t op,
                                    bool should_stream, ...);


/**
 * Returns the current I/O statistics for a specified job_id and an optional
 * corresponding job_step. The information will be returned in an
 * easy-to-process format, e.g., JSON (see Listing 3.1).
 *
 * @param[in] server The server to which the request is directed
 * @param[in] job An ADM_JOB identifying the originating job and,
 * optionally, its JOB_STEP.
 * @return[out] stats A list of JOB_STATS.
 * @return Returns ADM_SUCCESS if the remote procedure has completed
 */
ADM_return_t
ADM_get_statistics(ADM_server_t server, ADM_job_t job, ADM_job_stats_t** stats);

/**
 * Return a string describing the error number
 *
 * @param[in] errnum The error number for which a description should be
 * returned.
 * @return A pointer to a string describing `errnum`.
 */
const char*
ADM_strerror(ADM_return_t errnum);

#ifdef __cplusplus
} // extern "C"
#endif

#endif // SCORD_ADMIRE_H