agios.conf 7.06 KiB
Newer Older
library_options:
{  #ATTENTION: with my most recent modification on statistics, the prediction module is NOT WORKING, DO NOT USE IT.
	#should we generate trace files during execution? 
	trace = false ;

	#should we trace predicted requests? (for debug purposes, this trace will not be useful for future executions)
	trace_predict = false ;

	#should we make a complete trace, with all scheduler's operations, such as waiting times? (for debug purposes)
	trace_full = false ;

	#should the prediction module read trace files? (if false, the Prediction Module is useless)
	predict_read_traces = false ;

	#should the prediction module try to predict aggregations? (requires predict_read_traces = true)
	predict_request_aggregation = false ;

	#should the prediction module create simplified traces with information (the metrics) it obtained from the real traces?
	predict_write_simplified_traces = false;

	#the tolerance for arrival times difference when checking if two predicted requests are the same (in %)
	prediction_time_error = 10

	#this parameter gives the frequency with which the prediction module will redo its predicted aggregations  (in number of requests that must be processed between refreshs). This is necessary because these predictions use a factor that represents the ability to overlap waiting times with processing of other requests. At initialization, this factor will be calculated from the provided trace files, but during execution it can be recalculated using measurements for this ability during the actual scheduling. If the parameter is set to -1, aggregations will not be recalculated during execution.  (in number of requests)
	prediction_recalculate_alpha_period = -1

	#prefix and sufix for trace files (with path). Their names must be trace_file_prefix+"."+number+"."+trace_file_sufix, with ordered numbers (no holes)
	trace_file_prefix = "/tmp/agios_tracefile"
	trace_file_sufix = "out"
	#prefix for simple trace files (with path). Their names will be prefix+"."+number+"."+trace_file_sufix
	simple_trace_prefix = "/tmp/agios_simpletracefile"

	#parameters used by aIOLi and MLF
	waiting_time = 900000
	aioli_quantum = 65536
	mlf_quantum = 8192

	#parameter used by TW (ms)
	time_window_size = 1000 #the paper proposing TW recommends 1000 for HDD and 250 for SSD.

	#file (with path) with access times functions (generated by SeRRa - http://serratoool.bitbucket.org/). Used by aIOLi to quantum assignment and by the mechanism that automatically selects the best scheduling algorithm to use. If you are using a static algorithm which is not aIOLi, this does not matter, but you need to provide it anyway. In this case, you can use the one provided as example with the library source code 
	access_times_func_file = "/tmp/access_times.func"

	#to how many scheduling algorithms the performance module keeps measurements. When we are changing scheduling algorithms, we may observe new measurements (through the agios_release_request function) to the previous algorithms, so we could update information we have for them. It makes no sense to have a big value for performance_values if we don't change algorithms too often
	performance_values = 5

	#how many scheduling algorithms changes we register in the stats file?
	proc_algs = 1000

	#default I/O scheduling algorithm to use (the one to be used if the previous value was set to false)
	#existing algorithms (case sensitive): "MLF", "aIOLi", "SJF", "TO", "TO-agg", "SRTF", "TW", "NOOP", "DYN_TREE", "ARMED_BANDIT" (case sensitive) 
	# SRTF it's experimental and uses information from trace files (don't use it if these are not available)
	# NOOP is the NO operation scheduling algorithm, you should not observe performance improvements by using this one
	# TW only makes sense if the user is providing AGIOS correct application id for each request. Don't use it otherwise
	# DYN_TREE is a dynamic scheduling algorithm which selects the best (among MLF, AIOLI, SFJ, TO, TO-AGG, and NOOP) according to a decision tree. Use it only when using AGIOS to schedule requests to parallel file system' servers.
	# ARMED_BANDIT is another dynamic algorithm which periodically selects between MLF, aIOLi, SJF, TO, TO-agg, and NOOP. It keeps performance measurements for these algorithms and gives more probability to the better ones. 
	default_algorithm = "TO";

	# TWINS window size, in us
	twins_window_duration = 1000; 
	dynamic_twins = false;
	# time between window size adaptation, in ms
	dynamic_twins_period = 1000;
	
	dynamic_twins_clients = 32;
	dynamic_twins_processes = 128;

	# Only relevant if default_algorithm is a dynamic one. this parameter gives the frequency with which the automatic scheduling algorithm selection will recalculate the scheduling algorithm. This selection will be done using the access pattern from this period. If -1 is provided, then the selection will be done at the beginning of execution only (using information from traces for DYN_TREE). The next parameter gives the minimum number of requests which need to happen in this period for the selection to be done (otherwise we will wait longer before recalculating).  (in msec). Notice that it makes no sense to use ARMED_BANDIT without setting period and min_reqnumber, as it needs to be iteractive.
	select_algorithm_period = 1000

	select_algorithm_min_reqnumber=1

	#also for dynamic algorithms. says if TW should be one of the options considered by the dynamic scheduler. TW requires identifying requests according to the applications they come from. If this is not possible, don't use TW!
	enable_TW = false ;

	#if default_algorithm is a dynamic algorithm, you need to indicate which static algorithm to use at first (before automatically selecting the next one). Only relevant if you are using ARMED_BANDIT
	starting_algorithm = "SJF" ;

	# ARMED_BANDIT parameters - only relevant if default_algorithm is ARMED_BANDIT, but need to be provided anyway. min_ab_probability gives the minimum probability given to scheduling algorithms which performed poorly. The algorithm needs to maintain some probability to accomodate changes in the access pattern. validity_window is the period of time (in msec) for which performance measurements are still valid, after this time period we discard them (so we adapt to new situations). performance_window determines how many performance measurements the ARMED_BANDIT algorithms keeps for each scheduling algorithm option (taking the average of them). Keeping a huge window takes more memory. Moreover, it is related with validity_window and select_algorithm_period as if we discard measurements too often, we will never fill the whole window.
	min_ab_probability = 3 
	validity_window = 360000
	performance_window = 10
};
user_info:
{
	#stripe size used by the library's users (in bytes). This is used for detecting the access pattern at a parallel file system server. Useless for other uses. 
	stripe_size = 32768 ;

	#maximum buffer size used for storing trace parts (in KB). Having a buffer avoids generating requests to the local file system, which interfere in performance. On the other hand, having a large buffer can affect performance and decrease available space for data buffer.
	max_trace_buffer_size = 32768 ;

};