perf csv (5e840d64) · Commits · hpc / gekkofs

perf_tests/CMakeLists.txt

+5 −2

Original line number	Diff line number	Diff line
		@@ -136,8 +136,11 @@ add_custom_target(run-all-perf-with-script
		# Install targets
		install(TARGETS perf_metadata perf_data perf_delete perf_find perf_directory
		RUNTIME DESTINATION bin)
		install(FILES run_benchmarks.sh
		install(FILES run_benchmarks.sh run_all_benchmarks.py
		DESTINATION bin)
		install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/
		DESTINATION share/gekkofs/perf_tests
		FILES_MATCHING PATTERN "*.sh")
		FILES_MATCHING PATTERN "*.sh"
		PATTERN "*.py"
		PATTERN "*.hpp"
		PATTERN "README.md")

perf_tests/README.md

+78 −8

Original line number	Diff line number	Diff line
		@@ -209,19 +209,89 @@ Metric: Value (unit)

		## Comparing Results Across Changes

		### Method 1: Manual Comparison
		### Method 1: Using the Python Runner (recommended for spreadsheet analysis)

		1. Run benchmarks before and after your changes
		2. Results are saved to `perf_tests/results/` with timestamps
		3. Compare the output files manually
		This is the easiest way to get CSV output that can be opened in Excel, Google Sheets, R, or Python/pandas.

		```bash
		# Compare two result files
		diff <(grep "Avg:" results/metadata_20260119_120000.txt) \
		<(grep "Avg:" results/metadata_20260119_130000.txt)
		# Run all benchmarks and produce CSV files
		./run_all_benchmarks.py --mountdir /mnt/gekkofs

		# Or with custom iterations
		./run_all_benchmarks.py --mountdir /mnt/gekkofs --iterations 100

		# Or with LD_PRELOAD
		LD_PRELOAD=/path/to/gkfs_syscall_intercept.so ./run_all_benchmarks.py --mountdir /mnt/gekkofs

		# Run only specific benchmarks
		./run_all_benchmarks.py --mountdir /mnt/gekkofs --benchmarks metadata data

		# Custom output directory
		./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./my_results
		```

		This produces the following files in `results/<timestamp>/`:

		\| File \| Description \|
		\|------\|-------------\|
		\| `benchmark_results.csv` \| All individual benchmark results (one row per operation) \|
		\| `summary.csv` \| Aggregated summary with mean-of-means per operation \|
		\| `benchmark_type_means.csv` \| One row per benchmark type with overall mean (best for quick comparison) \|
		\| `run_info.json` \| Run metadata (timestamp, mountdir, iterations, hostname) \|

		#### benchmark_type_means.csv (for quick comparison)

		```csv
		benchmark_type,mean_time_ms,stddev_ms,num_operations
		data,5.432,2.345,6
		delete,3.210,1.876,4
		directory,1.234,0.567,4
		metadata,0.234,0.156,6
		find,12.345,5.678,5
		```

		Import into:
		- Excel/Google Sheets: Data → Get Data → From Text/CSV
		- R: `results <- read.csv("benchmark_type_means.csv")`
		- Python/pandas: `results = pd.read_csv("benchmark_type_means.csv")`

		#### Comparing two runs side-by-side

		```bash
		# Run baseline
		./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./baseline

		# ... make your code changes ...

		# Run after changes
		./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./after

		# Compare benchmark_type_means.csv side-by-side
		python3 - <<'EOF'
		import csv
		baseline = {r['benchmark_type']: float(r['mean_time_ms']) for r in csv.DictReader(open('baseline/benchmark_type_means.csv'))}
		after = {r['benchmark_type']: float(r['mean_time_ms']) for r in csv.DictReader(open('after/benchmark_type_means.csv'))}
		print(f"{'Type':<15} {'Baseline':>10} {'After':>10} {'Change':>10}")
		for t in baseline:
		old = baseline[t]
		new = after.get(t, 0)
		pct = ((new - old) / old) * 100 if old else 0
		sign = "+" if pct > 0 else ""
		print(f"{t:<15} {old:>10.4f} {new:>10.4f} {sign}{pct:>9.2f}%")
		EOF
		```

		Output:
		```
		Type Baseline After Change
		data 5.4320 4.8910 -9.96%
		delete 3.2100 3.0500 -4.98%
		directory 1.2340 1.1890 -3.65%
		metadata 0.2340 0.1980 -15.38%
		find 12.3450 11.2300 -9.03%
		```

		### Method 2: Automated Comparison Script
		### Method 2: Using the Shell Runner

		```bash
		# Run baseline

perf_tests/perf_common.hpp

+37 −0

Original line number	Diff line number	Diff line
		@@ -87,6 +87,43 @@ inline TimingResult finalize_timing(const std::vector<double>& intervals_ms) {
		return result;
		}

		// ============================================================================
		// CSV output helpers for spreadsheet/statistical tool integration
		// ============================================================================

		inline std::string format_timing_csv_header() {
		return "benchmark,operation,iterations,min_ms,max_ms,mean_ms,median_ms,stddev_ms,p50_ms,p90_ms,p95_ms,p99_ms,total_ms,throughput_ops_sec";
		}

		inline std::string format_timing_csv(const std::string& benchmark_name,
		const std::string& op_name,
		const TimingResult& result,
		size_t total_ops = 0) {
		std::ostringstream oss;
		oss << benchmark_name << ","
		<< op_name << ","
		<< result.iterations << ","
		<< std::fixed << std::setprecision(6)
		<< result.min_ms << ","
		<< result.max_ms << ","
		<< result.mean_ms << ","
		<< result.median_ms << ","
		<< result.std_dev_ms << ","
		<< result.p50_ms << ","
		<< result.p90_ms << ","
		<< result.p95_ms << ","
		<< result.p99_ms << ","
		<< result.total_ms;
		if (total_ops > 0 && result.total_ms > 0) {
		oss << "," << std::setprecision(2)
		<< (total_ops / (result.total_ms / 1000.0));
		} else {
		oss << ",0";
		}
		oss << "\n";
		return oss.str();
		}

		// ============================================================================
		// Results formatting
		// ============================================================================

perf_tests/perf_data.cpp

+6 −1

Original line number	Diff line number	Diff line
		@@ -340,6 +340,7 @@ void print_usage(const char* prog) {
		<< " --small-files <N> Number of small files (default: 100)\n"
		<< " --warmup <N> Number of warmup iterations (default: 5)\n"
		<< " --no-cleanup Don't cleanup test files after benchmark\n"
		<< " --csv-file <path> Write CSV results to file (for aggregation)\n"
		<< " --help Show this help message\n";
		}

		@@ -355,6 +356,8 @@ int main(int argc, char* argv[]) {
		int warmup = 5;
		bool cleanup = true;

		std::string csv_file_path;

		static struct option long_options[] = {
		{"mountdir", required_argument, 0, 'm'},
		{"iterations", required_argument, 0, 'i'},
		@@ -362,12 +365,13 @@ int main(int argc, char* argv[]) {
		{"small-files", required_argument, 0, 's'},
		{"warmup", required_argument, 0, 'w'},
		{"no-cleanup", no_argument, 0, 'n'},
		{"csv-file", required_argument, 0, 'c'},
		{"help", no_argument, 0, 'h'},
		{0, 0, 0, 0}
		};

		int opt;
		while ((opt = getopt_long(argc, argv, "m:i:f:s:w:nh", long_options, nullptr)) != -1) {
		while ((opt = getopt_long(argc, argv, "m:i:f:s:w:nc:h", long_options, nullptr)) != -1) {
		switch (opt) {
		case 'm': mountdir = optarg; break;
		case 'i': iterations = std::atoi(optarg); break;
		@@ -375,6 +379,7 @@ int main(int argc, char* argv[]) {
		case 's': small_files = std::atoi(optarg); break;
		case 'w': warmup = std::atoi(optarg); break;
		case 'n': cleanup = false; break;
		case 'c': csv_file_path = optarg; break;
		case 'h': print_usage(argv[0]); return 0;
		default: print_usage(argv[0]); return 1;
		}

perf_tests/perf_metadata.cpp

+54 −18

Original line number	Diff line number	Diff line
		@@ -280,6 +280,7 @@ void print_usage(const char* prog) {
		<< " --warmup <N> Number of warmup iterations (default: 10)\n"
		<< " --batch Use batch mode for mkdir test\n"
		<< " --no-cleanup Don't cleanup test files after benchmark\n"
		<< " --csv Output results in CSV format (to stdout)\n"
		<< " --help Show this help message\n";
		}

		@@ -295,6 +296,8 @@ int main(int argc, char* argv[]) {
		bool batch_mode = false;
		bool cleanup = true;

		bool csv_output = false;

		static struct option long_options[] = {
		{"mountdir", required_argument, 0, 'm'},
		{"iterations", required_argument, 0, 'i'},
		@@ -302,12 +305,13 @@ int main(int argc, char* argv[]) {
		{"warmup", required_argument, 0, 'w'},
		{"batch", no_argument, 0, 'b'},
		{"no-cleanup", no_argument, 0, 'n'},
		{"csv", no_argument, 0, 'c'},
		{"help", no_argument, 0, 'h'},
		{0, 0, 0, 0}
		};

		int opt;
		while ((opt = getopt_long(argc, argv, "m:i:f:w:bnh", long_options, nullptr)) != -1) {
		while ((opt = getopt_long(argc, argv, "m:i:f:w:bnch", long_options, nullptr)) != -1) {
		switch (opt) {
		case 'm': mountdir = optarg; break;
		case 'i': iterations = std::atoi(optarg); break;
		@@ -315,6 +319,7 @@ int main(int argc, char* argv[]) {
		case 'w': warmup = std::atoi(optarg); break;
		case 'b': batch_mode = true; break;
		case 'n': cleanup = false; break;
		case 'c': csv_output = true; break;
		case 'h': print_usage(argv[0]); return 0;
		default: print_usage(argv[0]); return 1;
		}
		@@ -336,24 +341,37 @@ int main(int argc, char* argv[]) {

		std::cout << "Test directory: " << tmp << "\n\n";

		// Timing results stored for CSV output
		TimingResult mkdir_single_result;
		TimingResult mkdir_batch_result;
		TimingResult stat_result;
		TimingResult readdir_result;
		TimingResult symlink_create_result;
		TimingResult symlink_batch_result;
		TimingResult symlink_resolve_result;
		TimingResult rename_result;
		bool mkdir_batch_done = false;
		bool symlink_batch_done = false;

		// ---- 1. Mkdir benchmark ----
		{
		MkdirBenchmark mkdir_bench(tmp, num_files);
		mkdir_bench.setup();

		std::cout << "--- Mkdir Performance ---\n";
		TimingResult single = run_benchmark(
		mkdir_single_result = run_benchmark(
		[&]() {
		std::string d = std::string(tmp) + "/single_" + std::to_string(rand());
		mkdir(d.c_str(), 0755);
		},
		warmup, iterations
		);
		std::cout << format_timing(single, "Single mkdir (individual)", iterations) << "\n";
		std::cout << format_timing(mkdir_single_result, "Single mkdir (individual)", iterations) << "\n";

		if (batch_mode) {
		TimingResult batch = mkdir_bench.batch_run(iterations);
		std::cout << format_timing(batch, "Batch mkdir (" + std::to_string(num_files) + " files/batch)", iterations) << "\n";
		mkdir_batch_result = mkdir_bench.batch_run(iterations);
		std::cout << format_timing(mkdir_batch_result, "Batch mkdir (" + std::to_string(num_files) + " files/batch)", iterations) << "\n";
		mkdir_batch_done = true;
		}

		mkdir_bench.cleanup();
		@@ -365,7 +383,7 @@ int main(int argc, char* argv[]) {
		stat_bench.setup();

		std::cout << "--- Stat Performance ---\n";
		TimingResult result = run_benchmark(
		stat_result = run_benchmark(
		[&]() {
		for (int i = 0; i < num_files; ++i) {
		struct stat st;
		@@ -374,7 +392,7 @@ int main(int argc, char* argv[]) {
		},
		warmup, iterations
		);
		std::cout << format_timing(result, "Stat all files (" + std::to_string(num_files) + " files)",
		std::cout << format_timing(stat_result, "Stat all files (" + std::to_string(num_files) + " files)",
		iterations * num_files) << "\n";

		stat_bench.cleanup();
		@@ -386,7 +404,7 @@ int main(int argc, char* argv[]) {
		readdir_bench.setup();

		std::cout << "--- Readdir Performance ---\n";
		TimingResult result = run_benchmark(
		readdir_result = run_benchmark(
		[&]() {
		DIR* dir = opendir(tmp);
		if (dir) {
		@@ -397,7 +415,7 @@ int main(int argc, char* argv[]) {
		},
		warmup, iterations
		);
		std::cout << format_timing(result, "Readdir (" + std::to_string(num_files) + " entries)", iterations) << "\n";
		std::cout << format_timing(readdir_result, "Readdir (" + std::to_string(num_files) + " entries)", iterations) << "\n";

		readdir_bench.cleanup();
		}
		@@ -408,29 +426,30 @@ int main(int argc, char* argv[]) {
		symlink_bench.setup();

		std::cout << "--- Symlink Create Performance ---\n";
		TimingResult create_result = run_benchmark(
		symlink_create_result = run_benchmark(
		[&]() {
		std::string link = std::string(tmp) + "/link_" + std::to_string(rand());
		symlink((std::string(tmp) + "/target_0").c_str(), link.c_str());
		},
		warmup, iterations
		);
		std::cout << format_timing(create_result, "Symlink create (individual)", iterations) << "\n";
		std::cout << format_timing(symlink_create_result, "Symlink create (individual)", iterations) << "\n";

		if (batch_mode) {
		TimingResult batch = symlink_bench.batch_create(iterations);
		std::cout << format_timing(batch, "Symlink batch create (" + std::to_string(num_files) + " files/batch)", iterations) << "\n";
		symlink_batch_result = symlink_bench.batch_create(iterations);
		std::cout << format_timing(symlink_batch_result, "Symlink batch create (" + std::to_string(num_files) + " files/batch)", iterations) << "\n";
		symlink_batch_done = true;
		}

		std::cout << "\n--- Symlink Resolve Performance ---\n";
		TimingResult resolve_result = run_benchmark(
		symlink_resolve_result = run_benchmark(
		[&]() {
		char buf[4096];
		readlink((std::string(tmp) + "/link_0").c_str(), buf, sizeof(buf) - 1);
		},
		warmup, iterations
		);
		std::cout << format_timing(resolve_result, "Symlink resolve (readlink)", iterations) << "\n";
		std::cout << format_timing(symlink_resolve_result, "Symlink resolve (readlink)", iterations) << "\n";

		symlink_bench.cleanup();
		}
		@@ -441,7 +460,7 @@ int main(int argc, char* argv[]) {
		rename_bench.setup();

		std::cout << "--- Rename Performance ---\n";
		TimingResult result = run_benchmark(
		rename_result = run_benchmark(
		[&]() {
		std::string temp = std::string(tmp) + "/temp_" + std::to_string(rand());
		rename((std::string(tmp) + "/orig_0").c_str(), temp.c_str());
		@@ -449,7 +468,7 @@ int main(int argc, char* argv[]) {
		},
		warmup, iterations
		);
		std::cout << format_timing(result, "Rename (back and forth)", iterations) << "\n";
		std::cout << format_timing(rename_result, "Rename (back and forth)", iterations) << "\n";

		rename_bench.cleanup();
		}
		@@ -462,6 +481,23 @@ int main(int argc, char* argv[]) {
		std::cout << "\nTest files kept in: " << tmp << "\n";
		}

		// CSV output
		if (csv_output) {
		std::cout << format_timing_csv_header() << "\n";
		std::cout << format_timing_csv("metadata", "mkdir_single", mkdir_single_result, iterations) << "\n";
		if (mkdir_batch_done) {
		std::cout << format_timing_csv("metadata", "mkdir_batch", mkdir_batch_result, iterations) << "\n";
		}
		std::cout << format_timing_csv("metadata", "stat_all_files", stat_result, iterations * num_files) << "\n";
		std::cout << format_timing_csv("metadata", "readdir", readdir_result, iterations) << "\n";
		std::cout << format_timing_csv("metadata", "symlink_create", symlink_create_result, iterations) << "\n";
		if (symlink_batch_done) {
		std::cout << format_timing_csv("metadata", "symlink_batch_create", symlink_batch_result, iterations) << "\n";
		}
		std::cout << format_timing_csv("metadata", "symlink_resolve", symlink_resolve_result, iterations) << "\n";
		std::cout << format_timing_csv("metadata", "rename", rename_result, iterations) << "\n";
		}

		std::cout << "\n=== Metadata benchmark complete ===\n";
		return 0;
		}