Commit 9a551f9b authored by Ramon Nou's avatar Ramon Nou
Browse files

added module scripts

parent aab61fe7
Loading
Loading
Loading
Loading
Loading

scripts/run/gkfs_run

0 → 100755
+101 −0
Original line number Diff line number Diff line
#!/bin/bash

# Default values
export GKFS_MNT=${GKFS_MNT:-/dev/shm/gkfs_mnt}
export GKFS_ROOT=${GKFS_ROOT:-/dev/shm/gkfs_root}
export GKFS_DAEMON_LOG_LEVEL=${GKFS_DAEMON_LOG_LEVEL:-0}

# Helper function to print usage
usage() {
    echo "Usage: $0 {start|stop|help}"
    echo
    echo "Commands:"
    echo "  start   Start GekkoFS daemons on allocated nodes"
    echo "  stop    Stop GekkoFS daemons and clean up"
    echo "  help    Show this help message"
}

# Function to start daemons
start() {
    if [ -z "$SLURM_JOB_NUM_NODES" ]; then
        echo "Error: SLURM_JOB_NUM_NODES not set. Are you in a Slurm allocation?"
        exit 1
    fi

    echo "Cleaning up previous run..."
    srun -n ${SLURM_JOB_NUM_NODES} -N ${SLURM_JOB_NUM_NODES} --oversubscribe \
         bash -c "rm -rf ${GKFS_MNT} ${GKFS_ROOT} ; mkdir -p ${GKFS_MNT} ${GKFS_ROOT}"

    # Use GKFS_HOSTS_FILE from environment or default to HOME
    if [ -z "$GKFS_HOSTS_FILE" ]; then
        export GKFS_HOSTS_FILE=${HOME}/gkfs_hosts.txt
    fi
    export LIBGKFS_HOSTS_FILE=${GKFS_HOSTS_FILE}
    
    # Create empty hosts file
    touch ${GKFS_HOSTS_FILE}

    COMM="-P ofi+verbs"
    CMD="${GKFS_DAEMON} --mountdir=${GKFS_MNT} --rootdir=${GKFS_ROOT} ${COMM} -l ib0 -c"

    echo "Starting GekkoFS Daemons on ${SLURM_JOB_NUM_NODES} nodes..."
    echo "Command: ${CMD}"

    # Unset conflicting env vars
    unset I_MPI_PMI_LIBRARY
    export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0

    # Launch daemons
    srun -N ${SLURM_JOB_NUM_NODES} -n ${SLURM_JOB_NUM_NODES} \
         -c 112 --overlap --overcommit --mem=0 --oversubscribe --export="ALL" \
         /bin/bash -c "${CMD}" &
    
    # Wait for servers to be ready
    echo "Waiting for servers to start..."
    while true; do
        if [ -f "$GKFS_HOSTS_FILE" ]; then
            LINE_COUNT=$(wc -l < "$GKFS_HOSTS_FILE")
            if [ "$LINE_COUNT" -ge "${SLURM_JOB_NUM_NODES}" ]; then
                echo "All ${SLURM_JOB_NUM_NODES} servers started."
                break
            fi
        fi
        sleep 1
    done
    
    echo "GekkoFS is ready."
    echo "Mount point: ${GKFS_MNT}"
    echo "Hosts file: ${GKFS_HOSTS_FILE}"
}

# Function to stop daemons
stop() {
    if [ -z "$SLURM_JOB_NUM_NODES" ]; then
        echo "Error: SLURM_JOB_NUM_NODES not set. Are you in a Slurm allocation?"
        exit 1
    fi

    echo "Stopping GekkoFS daemons..."
    srun -n ${SLURM_JOB_NUM_NODES} -N ${SLURM_JOB_NUM_NODES} \
         -c 1 --mem=0 --oversubscribe --export="ALL" \
         /bin/bash -c "pkill --signal SIGINT gkfs_daemon ; rm -rf ${GKFS_MNT} ${GKFS_ROOT}"
    
    echo "Cleanup complete."
}

# Main logic
case "$1" in
    start)
        start
        ;;
    stop)
        stop
        ;;
    help)
        usage
        ;;
    *)
        usage
        exit 1
        ;;
esac
+72 −0
Original line number Diff line number Diff line
#%Module1.0
########################################################
#
# Author: Ramon Nou
#
########################################################

set PROG_NAME           GekkoFS
set PROG_VERSION        master-0.9.6
set BASE                /apps/GPP/GEKKOFS
set PROG_HOME           $BASE/gkfs-master/bin/gkfs_daemon
set PROG_PROXY          $BASE/gkfs-master/bin/gkfs_proxy
set PRLD_HOME           $BASE/gkfs-master/lib64/libgkfs_intercept.so
set LIBC_HOME           $BASE/gkfs-master/lib64/libgkfs_libc_intercept.so
set LIBS                $BASE/deps-master/lib64:$BASE/deps-master/lib:$BASE/gkfs-master/lib64

proc ModulesHelp { } {
    puts stderr "----------------------------------------------------------------"
    puts stderr "\tGekkoFS Module $PROG_VERSION"
    puts stderr "----------------------------------------------------------------"
    puts stderr "\nDescription:"
    puts stderr "\tThis module loads the environment for GekkoFS."
    puts stderr "\tIt sets up environment variables and providing helper scripts."
    puts stderr "\nDefined Environment Variables:"
    puts stderr "\tGKFS_DAEMON      : Path to the GekkoFS daemon executable"
    puts stderr "\tGKFS_PROXY       : Path to the GekkoFS proxy executable"
    puts stderr "\tGKFS_INTERCEPT   : Path to the interception library (preferred)"
    puts stderr "\tGKFS_LIBC        : Path to the libc interception library"
    puts stderr "\tLD_LIBRARY_PATH  : Includes GekkoFS and ADMIRE dependencies"
    puts stderr "\tGKFS_MNT         : Default mount point for GekkoFS (can be overridden)"
    puts stderr "\tGKFS_ROOT        : Default root directory for GekkoFS (can be overridden)"
    puts stderr "\nHelper Scripts:"
    puts stderr "\tgkfs_run start   : Launch GekkoFS servers (requires Slurm allocation)"
    puts stderr "\tgkfs_run stop    : Stop GekkoFS servers and cleanup"
    puts stderr "----------------------------------------------------------------"
    puts stderr "\nExample:"
    puts stderr "\tLD_PRELOAD=\$GKFS_INTERCEPT ./your_application"
    puts stderr "----------------------------------------------------------------"
}

module-whatis   "Loads the $PROG_NAME $PROG_VERSION Environment"

# Consistency check
conflict ${PROG_NAME}

# Load required modules
if { [ module-info mode load ] } {
    module unload impi oneapi
    module load ucx/1.16.0-gcc libfabric/1.21.0-gcc gcc/14.1.0_binutils241
}

# Set Environment Variables
setenv          GKFS_DAEMON         $PROG_HOME
setenv          GKFS_PROXY          $PROG_PROXY
setenv          GKFS_INTERCEPT      $PRLD_HOME
setenv          GKFS_LIBC           $LIBC_HOME
setenv          GKFS_LOG_LEVEL      1
setenv          GKFS_DAEMON_LOG_LEVEL 0

# Useful defaults (can be overridden by user)
setenv        GKFS_MNT            /dev/shm/gkfs_mnt
setenv        GKFS_ROOT           $env(TMPDIR)/gkfs_root
setenv        GKFS_HOSTS_FILE     $env(HOME)/gkfs_hosts.txt
setenv        LIBGKFS_HOSTS_FILE  $env(HOME)/gkfs_hosts.txt

prepend-path    LD_LIBRARY_PATH     $LIBS
prepend-path    PATH                [file dirname [info script]]

if { [module-info mode] != "whatis" } {
    puts stderr "GekkoFS environment loaded."
    puts stderr "Run 'module help [module-info name]' for more information."
}