Merge branch 'slurm_scripts' (2ac8b6a2) · Commits · hpc / gekkofs

ifs/scripts/eval/ior_to_csv.py

0 → 100644

+145 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python
		# -- coding: utf-8 --

		import argparse
		import os

		import numpy as np

		__author__ = "Marc-Andre Vef"
		__email__ = "vef@uni-mainz.de"

		node_n = list()
		results_n = list()


		def parse_file(filepath):
		write_tmp = []
		read_tmp = []
		write_avg = []
		write_std = []
		read_avg = []
		read_std = []
		transfersizes = []
		n = 0
		curr_transfer = ''
		with open(filepath, 'r') as rf:
		for line in rf.readlines():
		if 'Startup successful. Daemon is ready.' in line:
		n += 1
		if '<new_transfer_size>' in line:
		curr_transfer = line.strip().split(';')[1]
		write_tmp = []
		read_tmp = []
		if '<finish_transfer_size>' in line:
		transfersizes.append(curr_transfer)
		write_avg.append(np.mean(write_tmp))
		write_std.append(np.std(write_tmp))
		read_avg.append(np.mean(read_tmp))
		read_std.append(np.std(read_tmp))
		curr_transfer = ''
		if 'Max Write' in line:
		write_tmp.append(float(line.split(' ')[2]))
		if 'Max Read' in line:
		read_tmp.append(float(line.split(' ')[3]))
		if len(write_avg) == 0 or len(read_avg) == 0:
		# something is wrong. discard this file
		print 'File %s does not contain results' % filepath
		return
		# put create stat and remove into dict index 0: avg, index 1 std
		node_n.append(n)
		tmp_d = dict()
		tmp_d['transfersizes'] = transfersizes
		tmp_d['write_avg'] = write_avg
		tmp_d['read_avg'] = read_avg
		tmp_d['write_std'] = write_std
		tmp_d['read_std'] = read_std
		tmp_d['node_n'] = n
		results_n.append(tmp_d)


		def parse_ior_out(inpath, outpath='', printshell=False, printonly=True):
		if not os.path.exists(inpath) or not os.path.isdir(inpath):
		print "Input path does not exist or is not a directory. Exiting."
		exit(1)
		# parse input
		in_depth = inpath.count(os.path.sep)
		for root, dirs, files in os.walk(inpath):
		curr_depth = root.count(os.path.sep)
		if curr_depth > in_depth:
		break
		for file in files:
		filepath = '%s/%s' % (root, file)
		parse_file(filepath)

		# create csv output
		csv_write_avg_l = list()
		csv_read_avg_l = list()
		csv_write_std_l = list()
		csv_read_std_l = list()
		header_string = '# nodes,%s' % ','.join([x for x in results_n[0]['transfersizes']])
		for i in range(len(node_n)):
		csv_write_avg = '%s,%s' % (node_n[i], ','.join(["{:.2f}".format(x) for x in results_n[i]['write_avg']]))
		csv_write_std = '%s,%s' % (node_n[i], ','.join(["{:.2f}".format(x) for x in results_n[i]['write_std']]))
		csv_read_avg = '%s,%s' % (node_n[i], ','.join(["{:.2f}".format(x) for x in results_n[i]['read_avg']]))
		csv_read_std = '%s,%s' % (node_n[i], ','.join(["{:.2f}".format(x) for x in results_n[i]['read_std']]))
		csv_write_avg_l.append([node_n[i], csv_write_avg])
		csv_read_avg_l.append([node_n[i], csv_read_avg])
		csv_write_std_l.append([node_n[i], csv_write_std])
		csv_read_std_l.append([node_n[i], csv_read_std])
		# sort by number of nodes
		csv_write_avg_l.sort(key=lambda x: x[0])
		csv_read_avg_l.sort(key=lambda x: x[0])
		csv_write_std_l.sort(key=lambda x: x[0])
		csv_read_std_l.sort(key=lambda x: x[0])
		# create csv strings
		csv_write_avg = '%s\n%s' % (header_string, '\n'.join([x[1] for x in csv_write_avg_l]))
		csv_write_std = '%s\n%s' % (header_string, '\n'.join([x[1] for x in csv_write_std_l]))
		csv_read_avg = '%s\n%s' % (header_string, '\n'.join([x[1] for x in csv_read_avg_l]))
		csv_read_std = '%s\n%s' % (header_string, '\n'.join([x[1] for x in csv_read_std_l]))
		# print output
		if printshell:
		print 'Write_avg:'
		print csv_write_avg
		print '\nRead_avg:'
		print csv_read_avg
		print '\nWrite_std:'
		print csv_write_std
		print '\nRead_std:'
		print csv_read_std
		if not printonly and outpath != '':
		# write output
		with open(outpath, 'w') as wf:
		wf.write('Write_avg:')
		wf.write(csv_write_avg)
		wf.write('Read_avg:')
		wf.write(csv_read_avg)
		wf.write('Write_std:')
		wf.write(csv_write_std)
		wf.write('Read_std:')
		wf.write(csv_read_std)


		if __name__ == "__main__":
		# Init parser
		parser = argparse.ArgumentParser(description='This script converts an ior output file into a csv. '
		'If only input path is given, csv is printed on shell',
		formatter_class=argparse.RawTextHelpFormatter)
		# positional arguments
		parser.add_argument('ior_in_path', type=str,
		help='path to the ior out input file. If its a directory it will process all files in it.')
		parser.add_argument('-o', '--output', metavar='<outpath>', type=str, default='',
		help='path to the csv output file location')
		parser.add_argument('-p', '--printshell', action='store_true',
		help='Output csv on shell')
		parser.add_argument('--printonly', action='store_true',
		help='Only output csv on shell')
		args = parser.parse_args()
		if args.printshell and args.output != '' and not args.printonly:
		parse_ior_out(args.ior_in_path, args.output, True, False)
		elif args.output != '' and not args.printonly:
		parse_ior_out(args.ior_in_path, args.output, False, False)
		else:
		parse_ior_out(args.ior_in_path, '', True, True)

		print '\nNothing left to do; exiting. :)'

ifs/scripts/slurm/mogon1_ior_ramdisk.sh

+92 −44

Original line number	Diff line number	Diff line
		@@ -5,11 +5,12 @@
		#SBATCH -p nodeshort
		#SBATCH -t 300
		#SBATCH -A zdvresearch
		#SBATCH --gres=ramdisk:20G
		#SBATCH --gres=ramdisk:16G

		usage_short() {
		echo "
		usage: mogon1_ior_ramdisk.sh [-h] [-n <PROC_PER_NODE>] [-b <BLOCKSIZE>] [-i <ITER>] [-Y] [-p]
		[-t <TRANSFERSIZES>] [-s] [-r] [-v]
		benchmark_dir+file_prefix
		"
		}
		@@ -28,29 +29,45 @@ optional arguments:
		-h, --help
		shows this help message and exits

		-n <PROC_PER_NODE>
		-n <PROC_PER_NODE>, --nodes <PROC_PER_NODE>
		number of processes per node
		defaults to '16'
		-b <BLOCKSIZE>
		total number of data written and read (use 1k, 1m, 1g, etc...)
		defaults to '1m'
		-i <ITER>
		-i <ITER>, --iterations <ITER>
		number of iterations done around IOR
		defaults to '1'
		-b <BLOCKSIZE>, --blocksize <BLOCKSIZE>
		total number of data written and read (use 1k, 1m, 1g, etc...)
		defaults to '16m'
		-t <TRANSFERSIZES>, --transfersizes <TRANSFERSIZES>
		Sets the transfer sizes for the block sizes. Set a space separated list.
		Each transfer size must be a multiple of the block size
		Example: \"64m 32m 16m 8m 4m 2m 1m 512k 256k 128k 4k 1k\"
		Defaults to example
		-s, --striping
		Enable random striping for readback. A random seed of 42 is used.
		-r, --random
		Enable random offsets for I/O
		-Y, --fsync
		use fsync after writes
		enable fsync after writes
		defaults to 'false'
		-v, --verbose
		enable ior verbosity
		-p, --pretend
		Pretend operation. Does not execute commands benchmark commands
		This does start and stop the adafs daemon
		"
		}

		# Set default values
		PROC_PER_NODE=16
		ITER=1
		BLOCKSIZE="1m"
		BLOCKSIZE="64m"
		FSYNC=false
		PRETEND=false
		STRIPING=false
		RANDOM=false
		VERBOSE=""
		TRANSFERSIZES="64m 32m 16m 8m 4m 2m 1m 512k 256k 128k 4k 1k"
		START_TIME="$(date -u +%s)"

		POSITIONAL=()
		while [[ $# -gt 0 ]]
		@@ -58,29 +75,46 @@ do
		key="$1"

		case ${key} in
		-n)
		-n\|--nodes)
		PROC_PER_NODE="$2"
		shift # past argument
		shift # past value
		;;
		-b)
		-b\|--blocksize)
		BLOCKSIZE="$2"
		shift # past argument
		shift # past value
		;;
		-i)
		-i\|--iterations)
		ITER="$2"
		shift # past argument
		shift # past value
		;;
		-t\|--transfersizes)
		TRANSFERSIZES="$2"
		shift # past argument
		shift # past value
		;;
		-Y\|--fsync)
		FSYNC=true
		shift # past argument
		;;
		-r\|--random)
		RANDOM=true
		shift # past argument
		;;
		-s\|--striping)
		STRIPING=true
		shift # past argument
		;;
		-p\|--pretend)
		PRETEND=true
		shift # past argument
		;;
		-v\|--verbose)
		VERBOSE="-vv"
		shift # past argument
		;;
		-h\|--help)
		help_msg
		exit
		@@ -134,7 +168,7 @@ echo "Generated hostfile no of nodes:"
		cat ${HOSTFILE} \| wc -l

		NONODES=$(cat ${HOSTFILE} \| wc -l)
		let MD_PROC_N=${NONODES}*16
		let IOR_PROC_N=${NONODES}*${PROC_PER_NODE}

		echo "
		############################################################################
		@@ -153,56 +187,70 @@ echo "
		"
		# Run benchmark

		BENCH_TMPL="mpiexec -np ${PROC_PER_NODE} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so ior -a POSIX -i 1 -o ${WORKDIR} -b ${BLOCKSIZE} -F -w -r -W"
		BENCH_TMPL="mpiexec -np ${IOR_PROC_N} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so numactl --cpunodebind=2,3,4,5,6,7 --membind=2,3,4,5,6,7 /gpfs/fs1/home/vef/benchmarks/mogon1/ior/build/src/ior -a POSIX -i 1 -o ${WORKDIR} -b ${BLOCKSIZE} ${VERBOSE} -x -F -w -r -W"

		echo "#############"
		echo "# 1. SEQUEL #"
		echo "#############"
		for TRANSFER in 4k 256k 512k 1m 2m 4m 8m 16m
		echo "##########################"
		echo "< 1. WARMUP >"
		echo "##########################"
		for ((i=1;i<=3;i+=1))
		do
		for i in {1..${ITER}}
		do
		CMD="${BENCH_TMPL} -t ${TRANSFER}"
		echo "## iteration $i"
		echo "## transfer size ${TRANSFER}"
		if [ "${FSYNC}" = true ] ; then
		CMD="${CMD} -Y"
		echo "## FSYNC on"
		fi
		CMD="${BENCH_TMPL} -t 16m"
		echo "## Command ${CMD}"
		if [ "${PRETEND}" = true ] ; then
		if [ "${PRETEND}" = false ] ; then
		eval ${CMD}
		fi
		done
		done

		echo "#############"
		echo "# 2. RANDOM #"
		echo "#############"
		for TRANSFER in 4k 256k 512k 1m 2m 4m 8m 16m
		# Run experiments
		echo "##########################"
		echo "< 2. RUNNING EXPERIMENTS >"
		echo "##########################"
		# Some info output
		if [ "${RANDOM}" = true ] ; then
		echo "## RANDOM I/O on"
		fi
		if [ "${STRIPING}" = true ] ; then
		echo "## STRIPING on"
		fi
		if [ "${FSYNC}" = true ] ; then
		echo "## FSYNC on"
		fi
		for TRANSFER in ${TRANSFERSIZES}
		do
		for i in {1..${ITER}}
		echo "<new_transfer_size>;${TRANSFER}"
		for ((i=1;i<=${ITER};i+=1))
		do
		CMD="${BENCH_TMPL} -t ${TRANSFER} -z"
		echo "## iteration $i"
		echo "## transfer size ${TRANSFER}"
		echo "<new_iteration>;$i"
		# build command from template and then execute it
		CMD="${BENCH_TMPL} -t ${TRANSFER}"
		echo "## iteration $i/${ITER} transfer size ${TRANSFER}"
		if [ "${RANDOM}" = true ] ; then
		CMD="${CMD} -z"
		fi
		if [ "${STRIPING}" = true ] ; then
		CMD="${CMD} -Z -X 42"
		fi
		if [ "${FSYNC}" = true ] ; then
		CMD="${CMD} -Y"
		echo "## FSYNC on"
		fi
		echo "## Command ${CMD}"
		if [ "${PRETEND}" = true ] ; then
		if [ "${PRETEND}" = false ] ; then
		eval ${CMD}
		fi
		echo "<finish_iteration>;$i"
		echo "### iteration $i/${ITER} done"
		done
		echo "<finish_transfer_size>;${TRANSFER}"
		echo "## new transfer size #################################"
		done

		# TODO 3. Striped later

		echo "
		############################################################################
		############################### DAEMON STOP ############################### ############################################################################
		"
		END_TIME="$(date -u +%s)"
		ELAPSED="$((${END_TIME}-${START_TIME}))"
		MINUTES=$((${ELAPSED} / 60))
		echo "##Elapsed time: ${MINUTES} minutes or ${ELAPSED} seconds elapsed for test set."
		# shut down adafs daemon on the nodes
		python2 ${VEF_HOME}/ifs/scripts/shutdown_adafs.py -J ${SLURM_JOB_ID} ${VEF_HOME}/ifs/build/bin/adafs_daemon ${HOSTFILE}

ifs/scripts/slurm/mogon1_mdtest_ramdisk.sh

+9 −4

Original line number	Diff line number	Diff line
		@@ -5,7 +5,7 @@
		#SBATCH -p nodeshort
		#SBATCH -t 300
		#SBATCH -A zdvresearch
		#SBATCH --gres=ramdisk:20G
		#SBATCH --gres=ramdisk:16G

		usage_short() {
		echo "
		@@ -45,6 +45,7 @@ MD_PROC_N=16
		MD_ITER=1
		MD_ITEMS="500000"
		MD_UNIQUE=""
		START_TIME="$(date -u +%s)"

		POSITIONAL=()
		while [[ $# -gt 0 ]]
		@@ -121,7 +122,7 @@ export CXX=$(which g++)
		echo "files per process: ${MD_ITEMS}"

		# create a proper hostfile to run
		srun -n ${SLURM_NNODES} hostname -s \| sort -u > ${HOSTFILE} && sed -e 's/$/ max_slots=32/' -i ${HOSTFILE}
		srun -n ${SLURM_NNODES} hostname -s \| sort -u > ${HOSTFILE} && sed -e 's/$/ max_slots=64/' -i ${HOSTFILE}

		echo "Generated hostfile no of nodes:"
		cat ${HOSTFILE} \| wc -l
		@@ -147,14 +148,18 @@ echo "
		############################################################################
		"
		# Run benchmark
		echo "Executing: mpiexec -np ${MD_PROC_N} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so ${VEF_HOME}/benchmarks/mogon1/mdtest-1.9.3-modified/mdtest -z 0 -b 1 -i ${MD_ITER} -d ${MD_DIR} -F -I ${MD_ITEMS} -C -r -T -v 1 ${MD_UNIQUE}"
		BENCHCMD="mpiexec -np ${MD_PROC_N} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so --cpunodebind=2,3,4,5,6,7 --membind=2,3,4,5,6,7 /gpfs/fs1/home/vef/benchmarks/mogon1/ior/build/src/mdtest -z 0 -b 1 -i ${MD_ITER} -d ${MD_DIR} -F -I ${MD_ITEMS} -C -r -T -v 1 ${MD_UNIQUE}"

		mpiexec -np ${MD_PROC_N} --map-by node --hostfile ${HOSTFILE} -x LD_PRELOAD=/gpfs/fs2/project/zdvresearch/vef/fs/ifs/build/lib/libadafs_preload_client.so ${VEF_HOME}/benchmarks/mogon1/mdtest-1.9.3-modified/mdtest -z 0 -b 1 -i ${MD_ITER} -d ${MD_DIR} -F -I ${MD_ITEMS} -C -r -T -v 1 ${MD_UNIQUE}
		eval ${BENCHCMD}

		echo "
		############################################################################
		############################### DAEMON STOP ############################### ############################################################################
		"
		END_TIME="$(date -u +%s)"
		ELAPSED="$((${END_TIME}-${START_TIME}))"
		MINUTES=$((${ELAPSED} / 60))
		echo "##Elapsed time: ${MINUTES} minutes or ${ELAPSED} seconds elapsed for test set."
		# shut down adafs daemon on the nodes
		python2 ${VEF_HOME}/ifs/scripts/shutdown_adafs.py -J ${SLURM_JOB_ID} ${VEF_HOME}/ifs/build/bin/adafs_daemon ${HOSTFILE}