Commit 2ac8b6a2 authored by Marc Vef's avatar Marc Vef
Browse files

Merge branch 'slurm_scripts'

parents b8da9f98 7ded21f5
Loading
Loading
Loading
Loading
+145 −0
Original line number Diff line number Diff line
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import os

import numpy as np

__author__ = "Marc-Andre Vef"
__email__ = "vef@uni-mainz.de"

node_n = list()
results_n = list()


def parse_file(filepath):
    write_tmp = []
    read_tmp = []
    write_avg = []
    write_std = []
    read_avg = []
    read_std = []
    transfersizes = []
    n = 0
    curr_transfer = ''
    with open(filepath, 'r') as rf:
        for line in rf.readlines():
            if 'Startup successful. Daemon is ready.' in line:
                n += 1
            if '<new_transfer_size>' in line:
                curr_transfer = line.strip().split(';')[1]
                write_tmp = []
                read_tmp = []
            if '<finish_transfer_size>' in line:
                transfersizes.append(curr_transfer)
                write_avg.append(np.mean(write_tmp))
                write_std.append(np.std(write_tmp))
                read_avg.append(np.mean(read_tmp))
                read_std.append(np.std(read_tmp))
                curr_transfer = ''
            if 'Max Write' in line:
                write_tmp.append(float(line.split(' ')[2]))
            if 'Max Read' in line:
                read_tmp.append(float(line.split(' ')[3]))
    if len(write_avg) == 0 or len(read_avg) == 0:
        # something is wrong. discard this file
        print 'File %s does not contain results' % filepath
        return
    # put create stat and remove into dict index 0: avg, index 1 std
    node_n.append(n)
    tmp_d = dict()
    tmp_d['transfersizes'] = transfersizes
    tmp_d['write_avg'] = write_avg
    tmp_d['read_avg'] = read_avg
    tmp_d['write_std'] = write_std
    tmp_d['read_std'] = read_std
    tmp_d['node_n'] = n
    results_n.append(tmp_d)


def parse_ior_out(inpath, outpath='', printshell=False, printonly=True):
    if not os.path.exists(inpath) or not os.path.isdir(inpath):
        print "Input path does not exist or is not a directory. Exiting."
        exit(1)
    # parse input
    in_depth = inpath.count(os.path.sep)
    for root, dirs, files in os.walk(inpath):
        curr_depth = root.count(os.path.sep)
        if curr_depth > in_depth:
            break
        for file in files:
            filepath = '%s/%s' % (root, file)
            parse_file(filepath)

    # create csv output
    csv_write_avg_l = list()
    csv_read_avg_l = list()
    csv_write_std_l = list()
    csv_read_std_l = list()
    header_string = '# nodes,%s' % ','.join([x for x in results_n[0]['transfersizes']])
    for i in range(len(node_n)):
        csv_write_avg = '%s,%s' % (node_n[i], ','.join(["{:.2f}".format(x) for x in results_n[i]['write_avg']]))
        csv_write_std = '%s,%s' % (node_n[i], ','.join(["{:.2f}".format(x) for x in results_n[i]['write_std']]))
        csv_read_avg = '%s,%s' % (node_n[i], ','.join(["{:.2f}".format(x) for x in results_n[i]['read_avg']]))
        csv_read_std = '%s,%s' % (node_n[i], ','.join(["{:.2f}".format(x) for x in results_n[i]['read_std']]))
        csv_write_avg_l.append([node_n[i], csv_write_avg])
        csv_read_avg_l.append([node_n[i], csv_read_avg])
        csv_write_std_l.append([node_n[i], csv_write_std])
        csv_read_std_l.append([node_n[i], csv_read_std])
    # sort by number of nodes
    csv_write_avg_l.sort(key=lambda x: x[0])
    csv_read_avg_l.sort(key=lambda x: x[0])
    csv_write_std_l.sort(key=lambda x: x[0])
    csv_read_std_l.sort(key=lambda x: x[0])
    # create csv strings
    csv_write_avg = '%s\n%s' % (header_string, '\n'.join([x[1] for x in csv_write_avg_l]))
    csv_write_std = '%s\n%s' % (header_string, '\n'.join([x[1] for x in csv_write_std_l]))
    csv_read_avg = '%s\n%s' % (header_string, '\n'.join([x[1] for x in csv_read_avg_l]))
    csv_read_std = '%s\n%s' % (header_string, '\n'.join([x[1] for x in csv_read_std_l]))
    # print output
    if printshell:
        print 'Write_avg:'
        print csv_write_avg
        print '\nRead_avg:'
        print csv_read_avg
        print '\nWrite_std:'
        print csv_write_std
        print '\nRead_std:'
        print csv_read_std
    if not printonly and outpath != '':
        # write output
        with open(outpath, 'w') as wf:
            wf.write('Write_avg:')
            wf.write(csv_write_avg)
            wf.write('Read_avg:')
            wf.write(csv_read_avg)
            wf.write('Write_std:')
            wf.write(csv_write_std)
            wf.write('Read_std:')
            wf.write(csv_read_std)


if __name__ == "__main__":
    # Init parser
    parser = argparse.ArgumentParser(description='This script converts an ior output file into a csv. '
                                                 'If only input path is given, csv is printed on shell',
                                     formatter_class=argparse.RawTextHelpFormatter)
    # positional arguments
    parser.add_argument('ior_in_path', type=str,
                        help='path to the ior out input file. If its a directory it will process all files in it.')
    parser.add_argument('-o', '--output', metavar='<outpath>', type=str, default='',
                        help='path to the csv output file location')
    parser.add_argument('-p', '--printshell', action='store_true',
                        help='Output csv on shell')
    parser.add_argument('--printonly', action='store_true',
                        help='Only output csv on shell')
    args = parser.parse_args()
    if args.printshell and args.output != '' and not args.printonly:
        parse_ior_out(args.ior_in_path, args.output, True, False)
    elif args.output != '' and not args.printonly:
        parse_ior_out(args.ior_in_path, args.output, False, False)
    else:
        parse_ior_out(args.ior_in_path, '', True, True)

    print '\nNothing left to do; exiting. :)'
+92 −44
Original line number Diff line number Diff line
@@ -5,11 +5,12 @@
#SBATCH -p nodeshort
#SBATCH -t 300
#SBATCH -A zdvresearch
#SBATCH --gres=ramdisk:20G
#SBATCH --gres=ramdisk:16G

usage_short() {
        echo "
usage: mogon1_ior_ramdisk.sh [-h] [-n <PROC_PER_NODE>] [-b <BLOCKSIZE>] [-i <ITER>] [-Y] [-p]
                             [-t <TRANSFERSIZES>] [-s] [-r] [-v]
                             benchmark_dir+file_prefix
        "
}
@@ -28,29 +29,45 @@ optional arguments:
        -h, --help
                                shows this help message and exits

        -n <PROC_PER_NODE>
        -n <PROC_PER_NODE>, --nodes <PROC_PER_NODE>
                                number of processes per node
                                defaults to '16'
        -b <BLOCKSIZE>
                                total number of data written and read (use 1k, 1m, 1g, etc...)
                                defaults to '1m'
        -i <ITER>
        -i <ITER>, --iterations <ITER>
                                number of iterations done around IOR
                                defaults to '1'
        -b <BLOCKSIZE>, --blocksize <BLOCKSIZE>
                                total number of data written and read (use 1k, 1m, 1g, etc...)
                                defaults to '16m'
        -t <TRANSFERSIZES>, --transfersizes <TRANSFERSIZES>
                                Sets the transfer sizes for the block sizes. Set a space separated list.
                                Each transfer size must be a multiple of the block size
                                Example: \"64m 32m 16m 8m 4m 2m 1m 512k 256k 128k 4k 1k\"
                                Defaults to example
        -s, --striping
                                Enable random striping for readback. A random seed of 42 is used.
        -r, --random
                                Enable random offsets for I/O
        -Y, --fsync
                                use fsync after writes
                                enable fsync after writes
                                defaults to 'false'
        -v, --verbose
                                enable ior verbosity
        -p, --pretend
                                Pretend operation. Does not execute commands benchmark commands
                                This does start and stop the adafs daemon
        "
}

# Set default values
PROC_PER_NODE=16
ITER=1
BLOCKSIZE="1m"
BLOCKSIZE="64m"
FSYNC=false
PRETEND=false
STRIPING=false
RANDOM=false
VERBOSE=""
TRANSFERSIZES="64m 32m 16m 8m 4m 2m 1m 512k 256k 128k 4k 1k"
START_TIME="$(date -u +%s)"

POSITIONAL=()
while [[ $# -gt 0 ]]
@@ -58,29 +75,46 @@ do
key="$1"

case ${key} in
    -n)
    -n|--nodes)
    PROC_PER_NODE="$2"
    shift # past argument
    shift # past value
    ;;
    -b)
    -b|--blocksize)
    BLOCKSIZE="$2"
    shift # past argument
    shift # past value
    ;;
    -i)
    -i|--iterations)
    ITER="$2"
    shift # past argument
    shift # past value
    ;;
    -t|--transfersizes)
    TRANSFERSIZES="$2"
    shift # past argument
    shift # past value
    ;;
    -Y|--fsync)
    FSYNC=true
    shift # past argument
    ;;
    -r|--random)
    RANDOM=true
    shift # past argument
    ;;
    -s|--striping)
    STRIPING=true
    shift # past argument
    ;;
    -p|--pretend)
    PRETEND=true
    shift # past argument
    ;;
    -v|--verbose)
    VERBOSE="-vv"
    shift # past argument
    ;;
    -h|--help)
    help_msg
    exit
@@ -134,7 +168,7 @@ echo "Generated hostfile no of nodes:"
cat ${HOSTFILE} | wc -l

NONODES=$(cat ${HOSTFILE} | wc -l)
let MD_PROC_N=${NONODES}*16
let IOR_PROC_N=${NONODES}*${PROC_PER_NODE}

echo "
############################################################################
@@ -153,56 +187,70 @@ echo "
"
# Run benchmark

BENCH_TMPL="mpiexec -np ${PROC_PER_NODE} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so ior -a POSIX -i 1 -o ${WORKDIR} -b ${BLOCKSIZE} -F -w -r -W"
BENCH_TMPL="mpiexec -np ${IOR_PROC_N} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so numactl --cpunodebind=2,3,4,5,6,7 --membind=2,3,4,5,6,7 /gpfs/fs1/home/vef/benchmarks/mogon1/ior/build/src/ior -a POSIX -i 1 -o ${WORKDIR} -b ${BLOCKSIZE} ${VERBOSE} -x -F -w -r -W"

echo "#############"
echo "# 1. SEQUEL #"
echo "#############"
for TRANSFER in 4k 256k 512k 1m 2m 4m 8m 16m
echo "##########################"
echo "< 1. WARMUP              >"
echo "##########################"
for ((i=1;i<=3;i+=1))
do
    for i in {1..${ITER}}
    do
        CMD="${BENCH_TMPL} -t ${TRANSFER}"
        echo "## iteration $i"
        echo "## transfer size ${TRANSFER}"
        if [ "${FSYNC}" = true ] ; then
            CMD="${CMD} -Y"
            echo "## FSYNC on"
        fi
    CMD="${BENCH_TMPL} -t 16m"
    echo "## Command ${CMD}"
        if [ "${PRETEND}" = true ] ; then
    if [ "${PRETEND}" = false ] ; then
        eval ${CMD}
    fi
done
done

echo "#############"
echo "# 2. RANDOM #"
echo "#############"
for TRANSFER in 4k 256k 512k 1m 2m 4m 8m 16m
# Run experiments
echo "##########################"
echo "< 2. RUNNING EXPERIMENTS >"
echo "##########################"
# Some info output
if [ "${RANDOM}" = true ] ; then
    echo "## RANDOM I/O on"
fi
if [ "${STRIPING}" = true ] ; then
    echo "## STRIPING on"
fi
if [ "${FSYNC}" = true ] ; then
    echo "## FSYNC on"
fi
for TRANSFER in ${TRANSFERSIZES}
do
    for i in {1..${ITER}}
    echo "<new_transfer_size>;${TRANSFER}"
    for ((i=1;i<=${ITER};i+=1))
    do
        CMD="${BENCH_TMPL} -t ${TRANSFER} -z"
        echo "## iteration $i"
        echo "## transfer size ${TRANSFER}"
        echo "<new_iteration>;$i"
        # build command from template and then execute it
        CMD="${BENCH_TMPL} -t ${TRANSFER}"
        echo "## iteration $i/${ITER} transfer size ${TRANSFER}"
        if [ "${RANDOM}" = true ] ; then
            CMD="${CMD} -z"
        fi
        if [ "${STRIPING}" = true ] ; then
            CMD="${CMD} -Z -X 42"
        fi
        if [ "${FSYNC}" = true ] ; then
            CMD="${CMD} -Y"
            echo "## FSYNC on"
        fi
        echo "## Command ${CMD}"
        if [ "${PRETEND}" = true ] ; then
        if [ "${PRETEND}" = false ] ; then
            eval ${CMD}
        fi
        echo "<finish_iteration>;$i"
        echo "### iteration $i/${ITER} done"
    done
    echo "<finish_transfer_size>;${TRANSFER}"
    echo "## new transfer size #################################"
done

# TODO 3. Striped later

echo "
############################################################################
############################### DAEMON STOP ############################### ############################################################################
"
END_TIME="$(date -u +%s)"
ELAPSED="$((${END_TIME}-${START_TIME}))"
MINUTES=$((${ELAPSED} / 60))
echo "##Elapsed time: ${MINUTES} minutes or ${ELAPSED} seconds elapsed for test set."
# shut down adafs daemon on the nodes
python2 ${VEF_HOME}/ifs/scripts/shutdown_adafs.py -J ${SLURM_JOB_ID} ${VEF_HOME}/ifs/build/bin/adafs_daemon ${HOSTFILE}

+9 −4
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@
#SBATCH -p nodeshort
#SBATCH -t 300
#SBATCH -A zdvresearch
#SBATCH --gres=ramdisk:20G
#SBATCH --gres=ramdisk:16G

usage_short() {
        echo "
@@ -45,6 +45,7 @@ MD_PROC_N=16
MD_ITER=1
MD_ITEMS="500000"
MD_UNIQUE=""
START_TIME="$(date -u +%s)"

POSITIONAL=()
while [[ $# -gt 0 ]]
@@ -121,7 +122,7 @@ export CXX=$(which g++)
echo "files per process: ${MD_ITEMS}"

# create a proper hostfile to run
srun -n ${SLURM_NNODES} hostname -s | sort -u > ${HOSTFILE} && sed -e 's/$/ max_slots=32/' -i ${HOSTFILE}
srun -n ${SLURM_NNODES} hostname -s | sort -u > ${HOSTFILE} && sed -e 's/$/ max_slots=64/' -i ${HOSTFILE}

echo "Generated hostfile no of nodes:"
cat ${HOSTFILE} | wc -l
@@ -147,14 +148,18 @@ echo "
############################################################################
"
# Run benchmark
echo "Executing: mpiexec -np ${MD_PROC_N} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so ${VEF_HOME}/benchmarks/mogon1/mdtest-1.9.3-modified/mdtest -z 0 -b 1 -i ${MD_ITER} -d ${MD_DIR} -F -I ${MD_ITEMS} -C -r -T -v 1 ${MD_UNIQUE}"
BENCHCMD="mpiexec -np ${MD_PROC_N} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so --cpunodebind=2,3,4,5,6,7 --membind=2,3,4,5,6,7 /gpfs/fs1/home/vef/benchmarks/mogon1/ior/build/src/mdtest -z 0 -b 1 -i ${MD_ITER} -d ${MD_DIR} -F -I ${MD_ITEMS} -C -r -T -v 1 ${MD_UNIQUE}"

mpiexec -np ${MD_PROC_N} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so ${VEF_HOME}/benchmarks/mogon1/mdtest-1.9.3-modified/mdtest -z 0 -b 1 -i ${MD_ITER} -d ${MD_DIR} -F -I ${MD_ITEMS} -C -r -T -v 1 ${MD_UNIQUE}
eval ${BENCHCMD}

echo "
############################################################################
############################### DAEMON STOP ############################### ############################################################################
"
END_TIME="$(date -u +%s)"
ELAPSED="$((${END_TIME}-${START_TIME}))"
MINUTES=$((${ELAPSED} / 60))
echo "##Elapsed time: ${MINUTES} minutes or ${ELAPSED} seconds elapsed for test set."
# shut down adafs daemon on the nodes
python2 ${VEF_HOME}/ifs/scripts/shutdown_adafs.py -J ${SLURM_JOB_ID} ${VEF_HOME}/ifs/build/bin/adafs_daemon ${HOSTFILE}