Commit c1d1cbf2 authored by Ramon Nou's avatar Ramon Nou
Browse files

Rnou/49 refactor some functions

More performant, bug fixing and dry-run --progress options
parent 57ce411b
Loading
Loading
Loading
Loading
+13 −106
Original line number Diff line number Diff line
@@ -36,106 +36,12 @@ project(
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

#[=======================================================================[.rst:

  include_from_source(contentName <options>...)

The ``include_from_source()`` function ensures that ``contentName`` is
populated and potentially added to the build by the time it returns.

**Options:**

  ``SOURCE_DIR <dir>``: Source directory into which downloaded contents reside.
    This must point to an existing directory where the external project has
    already been unpacked or cloned/checked out. If ``<dir>`` doesn't exist,
    the source code will be retrieved.

  ``GIT_REPOSITORY <url>``
    URL of the git repository. Any URL understood by the ``git`` command
    may be used.

  ``GIT_TAG <tag>``
    Git branch name, tag or commit hash. Note that branch names and tags should
    generally be specified as remote names (i.e. origin/myBranch rather than
    simply myBranch). This ensures that if the remote end has its tag moved or
    branch rebased or history rewritten, the local clone will still be updated
    correctly. In general, however, specifying a commit hash should be
    preferred for a number of reasons:

    If the local clone already has the commit corresponding to the hash, no git
    fetch needs to be performed to check for changes each time CMake is re-run.
    This can result in a significant speed up if many external projects are
    being used.

    Using a specific git hash ensures that the main project's own history is
    fully traceable to a specific point in the external project's evolution.
    If a branch or tag name is used instead, then checking out a specific
    commit of the main project doesn't necessarily pin the whole build to a
    specific point in the life of the external project. The lack of such
    deterministic behavior makes the main project lose traceability and
    repeatability.

  NOTE: If both ``SOURCE_DIR`` and ``GIT_REPOSITORY`` are specified,
  ``SOURCE_DIR`` will be the preferred location to populate ``contentName``
  from. If ``SOURCE_DIR`` doesn't exist, the function will fall back to the
  location defined by ``GIT_REPOSITORY``.

#]=======================================================================]
function(include_from_source contentName)

    set(OPTIONS)
    set(SINGLE_VALUE MESSAGE SOURCE_DIR GIT_REPOSITORY GIT_TAG)
    set(MULTI_VALUE)

    cmake_parse_arguments(ARGS "${OPTIONS}" "${SINGLE_VALUE}" "${MULTI_VALUE}" ${ARGN})

    if (ARGS_MESSAGE)
        message(STATUS ${ARGS_MESSAGE})
    endif ()

    include(FetchContent)

    if (EXISTS ${ARGS_SOURCE_DIR})
        file(GLOB_RECURSE SOURCE_FILES "${ARGS_SOURCE_DIR}/*")
        if (SOURCE_FILES STREQUAL "")
            message(FATAL_ERROR
                "The '${ARGS_SOURCE_DIR}' source directory appears "
                "to be empty. If it corresponds to a git submodule it may not have "
                "been properly initialized. Running:\n"
                "  'git submodule update --init --recursive'\n"
                "may fix the issue. If the directory corresponds to a manually "
                "downloaded dependency, please download it again.")
        endif ()

        message(STATUS "Found source directory for '${contentName}'. Building.")
        FetchContent_Declare(
            ${contentName}
            SOURCE_DIR ${ARGS_SOURCE_DIR}
        )
    else ()
        message(STATUS
            "Source directory for '${contentName}' not found.\n"
            "Downloading and building from remote Git repository.")

        if (NOT ARGS_GIT_REPOSITORY)
            message(FATAL_ERROR "GIT_REPOSITORY for \"${contentName}\" not defined")
        endif ()

        if (NOT ARGS_GIT_TAG)
            message(FATAL_ERROR "GIT_TAG for \"${contentName}\" not defined")
        endif ()

        FetchContent_Declare(
            ${contentName}
            GIT_REPOSITORY ${ARGS_GIT_REPOSITORY}
            GIT_TAG ${ARGS_GIT_TAG}
            GIT_SHALLOW ON
            GIT_PROGRESS ON
        )
    endif ()
# Make sure that CMake can find our internal modules
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

    FetchContent_MakeAvailable(${contentName})
endfunction()
# Import some convenience functions
include(FetchContentWrapper)
include(cargo-utils)


# Set default build type and also populate a list of available options
@@ -202,12 +108,6 @@ include(GNUInstallDirs)
# define options that depend on other options
include(CMakeDependentOption)

# Make sure that CMake can find our internal modules
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

# Import some convenience functions
include(cargo-utils)

# ##############################################################################
# Project configuration options
# ##############################################################################
@@ -509,3 +409,10 @@ install(FILES
  DESTINATION
  ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION}
  )


# Install public headers
install(
    DIRECTORY include/
    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
 No newline at end of file
+41 −22
Original line number Diff line number Diff line
@@ -171,41 +171,60 @@ cd build
RUNNER_SKIP_START=1 ctest -VV --output-on-failure --stop-on-failure -j 8
```

## Command-Line Utilities

## Options
Cargo supports the following option:
```
b --blocksize (default is 512). Transfers will use this blocksize in kbytes. 
```
Cargo provides several command-line tools to interact with the server.

## Utilities
There are a few utility command line programs that can be used to interact with Cargo.
### `cargoctl` - Server Management

The `cargoctl` script is the primary tool for managing the Cargo server lifecycle.

**Start a server:**
```shell
cli/ccp --server ofi+tcp://127.0.0.1:62000 --input /directory/subdir --output /directorydst/subdirdst --if <method> --of <method> 
cargoctl start -s ofi+tcp://127.0.0.1:62000 -H localhost -n 4
```
`--input` and `--output` are required arguments, and can be a directory or a file path.
`--if` and `--of`select the specific transfer method, on V0.4.0 there are many combinations:

`--if or --of` can be: posix, gekkofs, hercules, dataclay, expand and parallel (for MPIIO requests, but only one side is allowed).
**Stop a server:**
```shell
cargoctl stop -s ofi+tcp://127.0.0.1:62000
```

Typically you should use posix or parallel and then one specialized adhocfs. Posix is also able to be used with LD_PRELOAD, however
higher performance and flexibility can be obtained using the specific configuration. Some backends are only available with directory support for stage-in. 
### `ccp` - Parallel Copy

On the other hand, MPIIO (parallel) uses normally file locking so there is a performance imapact, and posix is faster (we supose no external modifications are done).
The `ccp` tool initiates a data transfer request.

Other commands are `ping`, `shutdown`, `shaping` (for bw control) and `cargo_ftio` to interactions with ftio (stage-out and gekkofs)
**Basic Usage:**
```shell
ccp --server <address> --input /path/to/source --output /path/to/dest --if <type> --of <type>
```

`cargo_ftio` provides --resume, --pause and --run options to pause and resume the ftio related transfers. We set ftio transfers, the transfers that have gekkofs as --of, that had been setup after a ftio command.
**New Options:**
*   `--progress`, `-p`: Show a live, interactive progress bar for the transfer.
*   `--dry-run`: Plan the transfer and report the number of files and total data size without actually moving any data.

**Example with Progress Bar:**
```shell
#SETUP FTIO, this enables stage-out to be delayed (10000 seconds)
cargo_ftio --server tcp://127.0.0.1:62000 -c -1 -p -1 -t 10000
#SETUP Stage-out (monitors data directory and subdirs for new file)
ccp --server tcp://127.0.0.1:62000 --input /data --output ~/stage-out --if gekkofs --of parallel
#UPDATE FTIO (as needed, each 25 seconds will do the transfer order)
cargo_ftio --server tcp://127.0.0.1:62000 -c -1 -p -1 -t 25
ccp --server ofi+tcp://127.0.0.1:62000 --input /large_dir --output /mnt/ssd/large_dir_copy -p
```
Output:
```
Started transfer with ID: 1
[================>             ]  35% (1.45 GB/s)
```

**Example with Dry Run:**
```shell
ccp --server ofi+tcp://127.0.0.1:62000 --input /large_dir --output /mnt/ssd/large_dir_copy --dry-run
```
Output:
```
Dry Run Plan:
  - Files to transfer: 10523
  - Total data size: 8.73 GB
```

### Other Utilities
Other tools include `cargo_ping`, `cargo_shutdown`, `shaping` (for bandwidth control), and `cargo_ftio` for interacting with the FTIO staging feature.

## User libraries for adhocfs
If Cargo finds the adhoc fs libraries (we support GekkoFS and dataclay, in this release), it will automatically use them.
+27 −82
Original line number Diff line number Diff line
@@ -34,95 +34,40 @@ configure_file(cargoctl.in cargoctl @ONLY)


################################################################################
## cargo_ping: A CLI tool to check if a Cargo server is running
add_executable(cargo_ping)

target_sources(cargo_ping
  PRIVATE
    ping.cpp
)

target_link_libraries(cargo_ping
  PUBLIC
## Common object library for CLI tools
add_library(cli_common OBJECT common.cpp)
target_link_libraries(cli_common PUBLIC
        fmt::fmt
        CLI11::CLI11
        net::rpc_client
        cargo
    )

################################################################################
## cargo_shutdown: A CLI tool to shutdown a Cargo server
add_executable(cargo_shutdown)

target_sources(cargo_shutdown
  PRIVATE
    shutdown.cpp
# Helper function to define a CLI tool
function(add_cargo_cli_tool name source)
    add_executable(${name})
    target_sources(${name} PRIVATE
        ${source}
        $<TARGET_OBJECTS:cli_common>
    )

target_link_libraries(cargo_shutdown
        PUBLIC
    target_link_libraries(${name} PUBLIC
        fmt::fmt
        CLI11::CLI11
        net::rpc_client
        cargo
    )
    install(TARGETS ${name} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
endfunction()

################################################################################
## ccp: A CLI tool to request a Cargo server to copy files between storage tiers
add_executable(ccp)

target_sources(ccp
  PRIVATE
    copy.cpp
)

target_link_libraries(ccp
  PUBLIC
    fmt::fmt
    CLI11::CLI11
    net::rpc_client
    cargo
)
## CLI tool definitions
add_cargo_cli_tool(cargo_ping ping.cpp)
add_cargo_cli_tool(cargo_shutdown shutdown.cpp)
add_cargo_cli_tool(ccp copy.cpp)
add_cargo_cli_tool(shaping shaping.cpp)
add_cargo_cli_tool(cargo_ftio ftio.cpp)

################################################################################
## shaping: A CLI tool to request a Cargo server to slowdown transfers 
add_executable(shaping)

target_sources(shaping
  PRIVATE
    shaping.cpp
)

target_link_libraries(shaping
  PUBLIC
    fmt::fmt
    CLI11::CLI11
    net::rpc_client
    cargo
)


################################################################################
## ftio: A CLI tool to send the ftio info to a Cargo server 
add_executable(cargo_ftio)

target_sources(cargo_ftio
  PRIVATE
    ftio.cpp
)

target_link_libraries(cargo_ftio
  PUBLIC
    fmt::fmt
    CLI11::CLI11
    net::rpc_client
    cargo
)


install(TARGETS cargo_ping cargo_shutdown ccp shaping cargo_ftio
        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)

# Installation
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/cargoctl
        DESTINATION ${CMAKE_INSTALL_BINDIR})
 No newline at end of file

cli/common.cpp

0 → 100644
+49 −0
Original line number Diff line number Diff line
/******************************************************************************
 * Copyright 2022-2023, Barcelona Supercomputing Center (BSC), Spain
 *
 * This software was partially supported by the EuroHPC-funded project ADMIRE
 *   (Project ID: 956748, https://www.admire-eurohpc.eu).
 *
 * This file is part of Cargo.
 *
 * Cargo is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cargo is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cargo.  If not, see <https://www.gnu.org/licenses/>.
 *
 * SPDX-License-Identifier: GPL-3.0-or-later
 *****************************************************************************/
#include "common.hpp"
#include <fmt/format.h>
#include <stdexcept>
#include <CLI/CLI.hpp>

std::pair<std::string, std::string>
parse_address(const std::string& address) {
    const auto pos = address.find("://");
    if(pos == std::string::npos) {
        throw std::runtime_error(fmt::format("Invalid address: {}", address));
    }

    const auto protocol = address.substr(0, pos);
    return std::make_pair(protocol, address);
}

void parse_rpc_command_line(int argc, char* argv[], CLI::App& app, std::string& server_address) {
    app.add_option("-s,--server", server_address, "Server address")
            ->option_text("ADDRESS")
            ->required();
    try {
        app.parse(argc, argv);
    } catch(const CLI::ParseError& ex) {
        std::exit(app.exit(ex));
    }
}
 No newline at end of file

cli/common.hpp

0 → 100644
+44 −0
Original line number Diff line number Diff line
/******************************************************************************
 * Copyright 2022-2023, Barcelona Supercomputing Center (BSC), Spain
 *
 * This software was partially supported by the EuroHPC-funded project ADMIRE
 *   (Project ID: 956748, https://www.admire-eurohpc.eu).
 *
 * This file is part of Cargo.
 *
 * Cargo is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cargo is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cargo.  If not, see <https://www.gnu.org/licenses/>.
 *
 * SPDX-License-Identifier: GPL-3.0-or-later
 *****************************************************************************/

#ifndef CARGO_CLI_COMMON_HPP
#define CARGO_CLI_COMMON_HPP

#include <string>
#include <utility>

namespace CLI {
    class App;
}

// Parses a server address string into protocol and address.
// Throws a runtime_error if the address is invalid.
std::pair<std::string, std::string>
parse_address(const std::string& address);


void parse_rpc_command_line(int argc, char* argv[], CLI::App& app, std::string& server_address);


#endif // CARGO_CLI_COMMON_HPP
 No newline at end of file
Loading