diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 77e2b16108496aaf04ff8607a26840bb8b940e33..df7db96efc1db7cc00f580e0cd25b904d3a59b65 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -136,8 +136,11 @@ add_custom_target(run-all-perf-with-script # Install targets install(TARGETS perf_metadata perf_data perf_delete perf_find perf_directory RUNTIME DESTINATION bin) -install(FILES run_benchmarks.sh +install(FILES run_benchmarks.sh run_all_benchmarks.py DESTINATION bin) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ DESTINATION share/gekkofs/perf_tests - FILES_MATCHING PATTERN "*.sh") + FILES_MATCHING PATTERN "*.sh" + PATTERN "*.py" + PATTERN "*.hpp" + PATTERN "README.md") diff --git a/perf_tests/README.md b/perf_tests/README.md index d896dbe81604a092c02e54da3384b92c8d0b7cdb..6435830a7be0851d1d67ea2d5ec7e3cab0f3ed35 100644 --- a/perf_tests/README.md +++ b/perf_tests/README.md @@ -209,19 +209,89 @@ Metric: Value (unit) ## Comparing Results Across Changes -### Method 1: Manual Comparison +### Method 1: Using the Python Runner (recommended for spreadsheet analysis) -1. Run benchmarks before and after your changes -2. Results are saved to `perf_tests/results/` with timestamps -3. Compare the output files manually +This is the easiest way to get CSV output that can be opened in Excel, Google Sheets, R, or Python/pandas. ```bash -# Compare two result files -diff <(grep "Avg:" results/metadata_20260119_120000.txt) \ - <(grep "Avg:" results/metadata_20260119_130000.txt) +# Run all benchmarks and produce CSV files +./run_all_benchmarks.py --mountdir /mnt/gekkofs + +# Or with custom iterations +./run_all_benchmarks.py --mountdir /mnt/gekkofs --iterations 100 + +# Or with LD_PRELOAD +LD_PRELOAD=/path/to/gkfs_syscall_intercept.so ./run_all_benchmarks.py --mountdir /mnt/gekkofs + +# Run only specific benchmarks +./run_all_benchmarks.py --mountdir /mnt/gekkofs --benchmarks metadata data + +# Custom output directory +./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./my_results +``` + +This produces the following files in `results//`: + +| File | Description | +|------|-------------| +| `benchmark_results.csv` | All individual benchmark results (one row per operation) | +| `summary.csv` | Aggregated summary with mean-of-means per operation | +| `benchmark_type_means.csv` | One row per benchmark type with overall mean (best for quick comparison) | +| `run_info.json` | Run metadata (timestamp, mountdir, iterations, hostname) | + +#### **benchmark_type_means.csv** (for quick comparison) + +```csv +benchmark_type,mean_time_ms,stddev_ms,num_operations +data,5.432,2.345,6 +delete,3.210,1.876,4 +directory,1.234,0.567,4 +metadata,0.234,0.156,6 +find,12.345,5.678,5 +``` + +Import into: +- **Excel/Google Sheets**: Data → Get Data → From Text/CSV +- **R**: `results <- read.csv("benchmark_type_means.csv")` +- **Python/pandas**: `results = pd.read_csv("benchmark_type_means.csv")` + +#### **Comparing two runs side-by-side** + +```bash +# Run baseline +./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./baseline + +# ... make your code changes ... + +# Run after changes +./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./after + +# Compare benchmark_type_means.csv side-by-side +python3 - <<'EOF' +import csv +baseline = {r['benchmark_type']: float(r['mean_time_ms']) for r in csv.DictReader(open('baseline/benchmark_type_means.csv'))} +after = {r['benchmark_type']: float(r['mean_time_ms']) for r in csv.DictReader(open('after/benchmark_type_means.csv'))} +print(f"{'Type':<15} {'Baseline':>10} {'After':>10} {'Change':>10}") +for t in baseline: + old = baseline[t] + new = after.get(t, 0) + pct = ((new - old) / old) * 100 if old else 0 + sign = "+" if pct > 0 else "" + print(f"{t:<15} {old:>10.4f} {new:>10.4f} {sign}{pct:>9.2f}%") +EOF +``` + +Output: +``` +Type Baseline After Change +data 5.4320 4.8910 -9.96% +delete 3.2100 3.0500 -4.98% +directory 1.2340 1.1890 -3.65% +metadata 0.2340 0.1980 -15.38% +find 12.3450 11.2300 -9.03% ``` -### Method 2: Automated Comparison Script +### Method 2: Using the Shell Runner ```bash # Run baseline diff --git a/perf_tests/perf_common.hpp b/perf_tests/perf_common.hpp index 36ed347c28602c8ed205e9a3e27f19f1d465993f..7ec5db216b160dc9b97ab3e3d65fd124c299986b 100644 --- a/perf_tests/perf_common.hpp +++ b/perf_tests/perf_common.hpp @@ -87,6 +87,43 @@ inline TimingResult finalize_timing(const std::vector& intervals_ms) { return result; } +// ============================================================================ +// CSV output helpers for spreadsheet/statistical tool integration +// ============================================================================ + +inline std::string format_timing_csv_header() { + return "benchmark,operation,iterations,min_ms,max_ms,mean_ms,median_ms,stddev_ms,p50_ms,p90_ms,p95_ms,p99_ms,total_ms,throughput_ops_sec"; +} + +inline std::string format_timing_csv(const std::string& benchmark_name, + const std::string& op_name, + const TimingResult& result, + size_t total_ops = 0) { + std::ostringstream oss; + oss << benchmark_name << "," + << op_name << "," + << result.iterations << "," + << std::fixed << std::setprecision(6) + << result.min_ms << "," + << result.max_ms << "," + << result.mean_ms << "," + << result.median_ms << "," + << result.std_dev_ms << "," + << result.p50_ms << "," + << result.p90_ms << "," + << result.p95_ms << "," + << result.p99_ms << "," + << result.total_ms; + if (total_ops > 0 && result.total_ms > 0) { + oss << "," << std::setprecision(2) + << (total_ops / (result.total_ms / 1000.0)); + } else { + oss << ",0"; + } + oss << "\n"; + return oss.str(); +} + // ============================================================================ // Results formatting // ============================================================================ diff --git a/perf_tests/perf_data.cpp b/perf_tests/perf_data.cpp index a1eb5a14a5a1a75ca7b1b61a75970fddf060da37..e791cd7383b215800907cd948e3ab51671b87ea3 100644 --- a/perf_tests/perf_data.cpp +++ b/perf_tests/perf_data.cpp @@ -340,6 +340,7 @@ void print_usage(const char* prog) { << " --small-files Number of small files (default: 100)\n" << " --warmup Number of warmup iterations (default: 5)\n" << " --no-cleanup Don't cleanup test files after benchmark\n" + << " --csv-file Write CSV results to file (for aggregation)\n" << " --help Show this help message\n"; } @@ -355,6 +356,8 @@ int main(int argc, char* argv[]) { int warmup = 5; bool cleanup = true; + std::string csv_file_path; + static struct option long_options[] = { {"mountdir", required_argument, 0, 'm'}, {"iterations", required_argument, 0, 'i'}, @@ -362,12 +365,13 @@ int main(int argc, char* argv[]) { {"small-files", required_argument, 0, 's'}, {"warmup", required_argument, 0, 'w'}, {"no-cleanup", no_argument, 0, 'n'}, + {"csv-file", required_argument, 0, 'c'}, {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; int opt; - while ((opt = getopt_long(argc, argv, "m:i:f:s:w:nh", long_options, nullptr)) != -1) { + while ((opt = getopt_long(argc, argv, "m:i:f:s:w:nc:h", long_options, nullptr)) != -1) { switch (opt) { case 'm': mountdir = optarg; break; case 'i': iterations = std::atoi(optarg); break; @@ -375,6 +379,7 @@ int main(int argc, char* argv[]) { case 's': small_files = std::atoi(optarg); break; case 'w': warmup = std::atoi(optarg); break; case 'n': cleanup = false; break; + case 'c': csv_file_path = optarg; break; case 'h': print_usage(argv[0]); return 0; default: print_usage(argv[0]); return 1; } diff --git a/perf_tests/perf_metadata.cpp b/perf_tests/perf_metadata.cpp index bfc1fc1a3eba47c0e4163ccf33613c487ceea510..f9426a61dc2aee6740a8652b44fac96809ae1267 100644 --- a/perf_tests/perf_metadata.cpp +++ b/perf_tests/perf_metadata.cpp @@ -280,6 +280,7 @@ void print_usage(const char* prog) { << " --warmup Number of warmup iterations (default: 10)\n" << " --batch Use batch mode for mkdir test\n" << " --no-cleanup Don't cleanup test files after benchmark\n" + << " --csv Output results in CSV format (to stdout)\n" << " --help Show this help message\n"; } @@ -295,6 +296,8 @@ int main(int argc, char* argv[]) { bool batch_mode = false; bool cleanup = true; + bool csv_output = false; + static struct option long_options[] = { {"mountdir", required_argument, 0, 'm'}, {"iterations", required_argument, 0, 'i'}, @@ -302,12 +305,13 @@ int main(int argc, char* argv[]) { {"warmup", required_argument, 0, 'w'}, {"batch", no_argument, 0, 'b'}, {"no-cleanup", no_argument, 0, 'n'}, + {"csv", no_argument, 0, 'c'}, {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; int opt; - while ((opt = getopt_long(argc, argv, "m:i:f:w:bnh", long_options, nullptr)) != -1) { + while ((opt = getopt_long(argc, argv, "m:i:f:w:bnch", long_options, nullptr)) != -1) { switch (opt) { case 'm': mountdir = optarg; break; case 'i': iterations = std::atoi(optarg); break; @@ -315,6 +319,7 @@ int main(int argc, char* argv[]) { case 'w': warmup = std::atoi(optarg); break; case 'b': batch_mode = true; break; case 'n': cleanup = false; break; + case 'c': csv_output = true; break; case 'h': print_usage(argv[0]); return 0; default: print_usage(argv[0]); return 1; } @@ -336,24 +341,37 @@ int main(int argc, char* argv[]) { std::cout << "Test directory: " << tmp << "\n\n"; + // Timing results stored for CSV output + TimingResult mkdir_single_result; + TimingResult mkdir_batch_result; + TimingResult stat_result; + TimingResult readdir_result; + TimingResult symlink_create_result; + TimingResult symlink_batch_result; + TimingResult symlink_resolve_result; + TimingResult rename_result; + bool mkdir_batch_done = false; + bool symlink_batch_done = false; + // ---- 1. Mkdir benchmark ---- { MkdirBenchmark mkdir_bench(tmp, num_files); mkdir_bench.setup(); std::cout << "--- Mkdir Performance ---\n"; - TimingResult single = run_benchmark( + mkdir_single_result = run_benchmark( [&]() { std::string d = std::string(tmp) + "/single_" + std::to_string(rand()); mkdir(d.c_str(), 0755); }, warmup, iterations ); - std::cout << format_timing(single, "Single mkdir (individual)", iterations) << "\n"; + std::cout << format_timing(mkdir_single_result, "Single mkdir (individual)", iterations) << "\n"; if (batch_mode) { - TimingResult batch = mkdir_bench.batch_run(iterations); - std::cout << format_timing(batch, "Batch mkdir (" + std::to_string(num_files) + " files/batch)", iterations) << "\n"; + mkdir_batch_result = mkdir_bench.batch_run(iterations); + std::cout << format_timing(mkdir_batch_result, "Batch mkdir (" + std::to_string(num_files) + " files/batch)", iterations) << "\n"; + mkdir_batch_done = true; } mkdir_bench.cleanup(); @@ -365,7 +383,7 @@ int main(int argc, char* argv[]) { stat_bench.setup(); std::cout << "--- Stat Performance ---\n"; - TimingResult result = run_benchmark( + stat_result = run_benchmark( [&]() { for (int i = 0; i < num_files; ++i) { struct stat st; @@ -374,7 +392,7 @@ int main(int argc, char* argv[]) { }, warmup, iterations ); - std::cout << format_timing(result, "Stat all files (" + std::to_string(num_files) + " files)", + std::cout << format_timing(stat_result, "Stat all files (" + std::to_string(num_files) + " files)", iterations * num_files) << "\n"; stat_bench.cleanup(); @@ -386,7 +404,7 @@ int main(int argc, char* argv[]) { readdir_bench.setup(); std::cout << "--- Readdir Performance ---\n"; - TimingResult result = run_benchmark( + readdir_result = run_benchmark( [&]() { DIR* dir = opendir(tmp); if (dir) { @@ -397,7 +415,7 @@ int main(int argc, char* argv[]) { }, warmup, iterations ); - std::cout << format_timing(result, "Readdir (" + std::to_string(num_files) + " entries)", iterations) << "\n"; + std::cout << format_timing(readdir_result, "Readdir (" + std::to_string(num_files) + " entries)", iterations) << "\n"; readdir_bench.cleanup(); } @@ -408,29 +426,30 @@ int main(int argc, char* argv[]) { symlink_bench.setup(); std::cout << "--- Symlink Create Performance ---\n"; - TimingResult create_result = run_benchmark( + symlink_create_result = run_benchmark( [&]() { std::string link = std::string(tmp) + "/link_" + std::to_string(rand()); symlink((std::string(tmp) + "/target_0").c_str(), link.c_str()); }, warmup, iterations ); - std::cout << format_timing(create_result, "Symlink create (individual)", iterations) << "\n"; + std::cout << format_timing(symlink_create_result, "Symlink create (individual)", iterations) << "\n"; if (batch_mode) { - TimingResult batch = symlink_bench.batch_create(iterations); - std::cout << format_timing(batch, "Symlink batch create (" + std::to_string(num_files) + " files/batch)", iterations) << "\n"; + symlink_batch_result = symlink_bench.batch_create(iterations); + std::cout << format_timing(symlink_batch_result, "Symlink batch create (" + std::to_string(num_files) + " files/batch)", iterations) << "\n"; + symlink_batch_done = true; } std::cout << "\n--- Symlink Resolve Performance ---\n"; - TimingResult resolve_result = run_benchmark( + symlink_resolve_result = run_benchmark( [&]() { char buf[4096]; readlink((std::string(tmp) + "/link_0").c_str(), buf, sizeof(buf) - 1); }, warmup, iterations ); - std::cout << format_timing(resolve_result, "Symlink resolve (readlink)", iterations) << "\n"; + std::cout << format_timing(symlink_resolve_result, "Symlink resolve (readlink)", iterations) << "\n"; symlink_bench.cleanup(); } @@ -441,7 +460,7 @@ int main(int argc, char* argv[]) { rename_bench.setup(); std::cout << "--- Rename Performance ---\n"; - TimingResult result = run_benchmark( + rename_result = run_benchmark( [&]() { std::string temp = std::string(tmp) + "/temp_" + std::to_string(rand()); rename((std::string(tmp) + "/orig_0").c_str(), temp.c_str()); @@ -449,7 +468,7 @@ int main(int argc, char* argv[]) { }, warmup, iterations ); - std::cout << format_timing(result, "Rename (back and forth)", iterations) << "\n"; + std::cout << format_timing(rename_result, "Rename (back and forth)", iterations) << "\n"; rename_bench.cleanup(); } @@ -462,6 +481,23 @@ int main(int argc, char* argv[]) { std::cout << "\nTest files kept in: " << tmp << "\n"; } + // CSV output + if (csv_output) { + std::cout << format_timing_csv_header() << "\n"; + std::cout << format_timing_csv("metadata", "mkdir_single", mkdir_single_result, iterations) << "\n"; + if (mkdir_batch_done) { + std::cout << format_timing_csv("metadata", "mkdir_batch", mkdir_batch_result, iterations) << "\n"; + } + std::cout << format_timing_csv("metadata", "stat_all_files", stat_result, iterations * num_files) << "\n"; + std::cout << format_timing_csv("metadata", "readdir", readdir_result, iterations) << "\n"; + std::cout << format_timing_csv("metadata", "symlink_create", symlink_create_result, iterations) << "\n"; + if (symlink_batch_done) { + std::cout << format_timing_csv("metadata", "symlink_batch_create", symlink_batch_result, iterations) << "\n"; + } + std::cout << format_timing_csv("metadata", "symlink_resolve", symlink_resolve_result, iterations) << "\n"; + std::cout << format_timing_csv("metadata", "rename", rename_result, iterations) << "\n"; + } + std::cout << "\n=== Metadata benchmark complete ===\n"; return 0; -} \ No newline at end of file +} diff --git a/perf_tests/run_all_benchmarks.py b/perf_tests/run_all_benchmarks.py new file mode 100755 index 0000000000000000000000000000000000000000..8e7a327e72f1dd9211cf8920a5d67a03b6fbf4f4 --- /dev/null +++ b/perf_tests/run_all_benchmarks.py @@ -0,0 +1,437 @@ +#!/usr/bin/env python3 +""" +run_all_benchmarks.py - Master benchmark runner for GekkoFS performance tests + +Runs all benchmarks and aggregates results into CSV files suitable for +spreadsheet (Excel/Google Sheets) or statistical tools (R, Python/pandas). + +Usage: + ./run_all_benchmarks.py --mountdir /mnt/gekkofs + ./run_all_benchmarks.py --mountdir /mnt/gekkofs --iterations 100 --output-dir ./results + LD_PRELOAD=/path/to/gkfs_syscall_intercept.so ./run_all_benchmarks.py --mountdir /mnt/gekkofs + +Output files (in results/ directory): + benchmark_results.csv - All individual benchmark results (one row per operation) + summary.csv - Aggregated summary by benchmark type + benchmark_type_means.csv - Mean time per benchmark type (for quick comparison) + run_info.json - Run configuration and metadata +""" + +import argparse +import csv +import json +import os +import re +import shutil +import subprocess +import sys +import time +from datetime import datetime +from pathlib import Path + + +# Benchmark definitions +BENCHMARKS = [ + { + "name": "metadata", + "executable": "perf_metadata", + "default_args": ["--iterations", "50", "--files", "100", "--warmup", "3"], + "description": "Metadata operations (mkdir, stat, readdir, symlink, rename)" + }, + { + "name": "data", + "executable": "perf_data", + "default_args": ["--iterations", "50", "--warmup", "3", "--filesize", "1048576"], + "description": "Data operations (seq write, seq read, rand write, rand read)" + }, + { + "name": "delete", + "executable": "perf_delete", + "default_args": ["--iterations", "50", "--warmup", "3", "--files", "100"], + "description": "Delete operations (file, directory, recursive, symlink)" + }, + { + "name": "find", + "executable": "perf_find", + "default_args": ["--iterations", "5", "--warmup", "2", "--dir-count", "50", "--files-per-dir", "100"], + "description": "Find/search operations (opendir/readdir, fstatat, recursive traversal)" + }, + { + "name": "directory", + "executable": "perf_directory", + "default_args": ["--iterations", "50", "--warmup", "3"], + "description": "Directory operations (mkdir, rmdir, rename, symlink)" + }, +] + + +# Regex patterns to parse text output from benchmarks +TIMING_PATTERN = re.compile( + r'---\s+(.+?)\s*---\s+' + r'=\s{60}\s*\n' + r'\s+(.+?)\s*\n' + r'-{60}\s*\n' + r'\s+Iterations:\s+(\d+)\s*\n' + r'(?:\s+Total ops:\s+(\d+)\s*\n)?' + r'(?:\s+Throughput:\s+([\d.]+)\s+ops/sec\s*\n)?' + r'\s+Total time:\s+([\d.]+)\s+ms\s*\n' + r'\s+Min:\s+([\d.]+)\s+ms\s*\n' + r'\s+Max:\s+([\d.]+)\s+ms\s*\n' + r'\s+Mean:\s+([\d.]+)\s+ms\s*\n' + r'\s+Median:\s+([\d.]+)\s+ms\s*\n' + r'\s+Std dev:\s+([\d.]+)\s+ms\s*\n' + r'\s+p50:\s+([\d.]+)\s+ms\s*\n' + r'\s+p90:\s+([\d.]+)\s+ms\s*\n' + r'\s+p95:\s+([\d.]+)\s+ms\s*\n' + r'\s+p99:\s+([\d.]+)\s+ms\s*' + , re.MULTILINE +) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Run all GekkoFS performance benchmarks and aggregate CSV results", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run with FUSE mount + ./run_all_benchmarks.py --mountdir /mnt/gekkofs + + # Run with LD_PRELOAD (syscall interception) + LD_PRELOAD=/path/to/gkfs_syscall_intercept.so ./run_all_benchmarks.py --mountdir /mnt/gekkofs + + # Custom iterations and output directory + ./run_all_benchmarks.py --mountdir /mnt/gekkofs --iterations 100 --output-dir ./my_results + + # Run only specific benchmarks + ./run_all_benchmarks.py --mountdir /mnt/gekkofs --benchmarks metadata data + +Output files (in results/ directory): + benchmark_results.csv - All individual benchmark results (one row per operation) + summary.csv - Aggregated summary statistics + benchmark_type_means.csv - Mean time per benchmark type for quick comparison + run_info.json - Run configuration and metadata +""" + ) + parser.add_argument("--mountdir", "-m", required=True, help="Mount point of GekkoFS") + parser.add_argument("--iterations", "-i", type=int, default=None, + help="Override iterations for all benchmarks") + parser.add_argument("--output-dir", "-o", default=None, + help="Output directory for results (default: results/)") + parser.add_argument("--benchmarks", "-b", nargs="+", + help="Run only specific benchmarks (e.g., 'metadata data')") + parser.add_argument("--csv-stdout", action="store_true", + help="Output combined CSV to stdout") + parser.add_argument("--verbose", "-v", action="store_true", + help="Print detailed output") + return parser.parse_args() + + +def parse_timing_output(output_text): + """Parse timing results from benchmark text output.""" + results = [] + for match in TIMING_PATTERN.finditer(output_text): + op_name = match.group(1).strip() + op_name = re.sub(r'\s*\(.*\)', '', op_name) # Remove parenthetical notes + iterations = int(match.group(3)) + total_ms = float(match.group(6)) + min_ms = float(match.group(7)) + max_ms = float(match.group(8)) + mean_ms = float(match.group(9)) + median_ms = float(match.group(10)) + stddev_ms = float(match.group(11)) + p50_ms = float(match.group(12)) + p90_ms = float(match.group(13)) + p95_ms = float(match.group(14)) + p99_ms = float(match.group(15)) + + throughput = 0 + total_ops = 0 + if match.group(5): + throughput = float(match.group(5)) + total_ops = int(match.group(4)) if match.group(4) else iterations + + results.append({ + "operation": op_name, + "iterations": iterations, + "total_ops": total_ops, + "throughput_ops_sec": throughput, + "min_ms": min_ms, + "max_ms": max_ms, + "mean_ms": mean_ms, + "median_ms": median_ms, + "stddev_ms": stddev_ms, + "p50_ms": p50_ms, + "p90_ms": p90_ms, + "p95_ms": p95_ms, + "p99_ms": p99_ms, + "total_ms": total_ms, + }) + return results + + +def run_benchmark(bench_config, mountdir, iterations_override=None, verbose=False): + """Run a single benchmark and parse its output.""" + name = bench_config["name"] + exe = bench_config["executable"] + default_args = bench_config["default_args"] + + # Find executable + exe_path = shutil.which(exe) + if exe_path is None: + candidate = os.path.join(os.path.dirname(os.path.abspath(__file__)), exe) + if os.path.isfile(candidate): + exe_path = candidate + else: + print(f" ERROR: {exe} not found in PATH or perf_tests/") + return [] + + # Build command + args = list(default_args) + if iterations_override: + for i, arg in enumerate(args): + if arg == "--iterations" and i + 1 < len(args): + args[i + 1] = str(iterations_override) + break + else: + args.extend(["--iterations", str(iterations_override)]) + + cmd = [exe_path, "--mountdir", mountdir] + args + + if verbose: + print(f" Running: {' '.join(cmd)}") + + env = os.environ.copy() + ld_preload = os.environ.get("LD_PRELOAD") + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=3600, + env=env + ) + combined_output = result.stdout + + if result.returncode != 0: + print(f" ERROR: {name} failed (exit code {result.returncode})") + if result.stderr: + print(f" Stderr: {result.stderr[:200]}") + return [] + + # Parse results + parsed = parse_timing_output(combined_output) + if parsed: + print(f" ✓ {name}: {len(parsed)} operations recorded") + else: + print(f" ! {name}: no timing results found in output") + + return parsed + + except subprocess.TimeoutExpired: + print(f" ERROR: {name} timed out after 3600s") + return [] + except Exception as e: + print(f" ERROR: {name}: {e}") + return [] + + +def write_csv_output(results, output_path): + """Write all results to a CSV file.""" + if not results: + return + + fieldnames = [ + "benchmark", "operation", "iterations", "total_ops", + "throughput_ops_sec", "min_ms", "max_ms", "mean_ms", + "median_ms", "stddev_ms", "p50_ms", "p90_ms", "p95_ms", + "p99_ms", "total_ms" + ] + + with open(output_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore') + writer.writeheader() + for row in results: + writer.writerow(row) + + +def write_summary_csv(results, output_path): + """Write summary CSV with aggregated stats by benchmark type.""" + if not results: + return + + # Group by benchmark + operation + groups = {} + for r in results: + key = (r["benchmark"], r["operation"]) + if key not in groups: + groups[key] = [] + groups[key].append(r) + + fieldnames = [ + "benchmark", "operation", "num_runs", "mean_of_means_ms", + "stddev_of_means_ms", "overall_min_ms", "overall_max_ms", + "mean_iterations" + ] + + with open(output_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + + for (bench, op), runs in sorted(groups.items()): + means = [r["mean_ms"] for r in runs] + mins = [r["min_ms"] for r in runs] + maxs = [r["max_ms"] for r in runs] + iters = [r["iterations"] for r in runs] + + mean_of_means = sum(means) / len(means) + variance = sum((m - mean_of_means) ** 2 for m in means) / len(means) + stddev = variance ** 0.5 + + writer.writerow({ + "benchmark": bench, + "operation": op, + "num_runs": len(runs), + "mean_of_means_ms": round(mean_of_means, 6), + "stddev_of_means_ms": round(stddev, 6), + "overall_min_ms": round(min(mins), 6), + "overall_max_ms": round(max(maxs), 6), + "mean_iterations": round(sum(iters) / len(iters)), + }) + + +def write_type_means_csv(results, output_path): + """Write one row per benchmark type with overall mean.""" + if not results: + return + + groups = {} + for r in results: + bench = r["benchmark"] + if bench not in groups: + groups[bench] = [] + groups[bench].append(r["mean_ms"]) + + with open(output_path, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["benchmark_type", "mean_time_ms", "stddev_ms", "num_operations"]) + for bench_name, means in sorted(groups.items()): + mean_val = sum(means) / len(means) + variance = sum((m - mean_val) ** 2 for m in means) / len(means) + stddev = variance ** 0.5 + writer.writerow([ + bench_name, + round(mean_val, 6), + round(stddev, 6), + len(means) + ]) + + +def write_run_info(output_path, args, benchmarks_run): + """Write run metadata as JSON.""" + info = { + "timestamp": datetime.now().isoformat(), + "mountdir": args.mountdir, + "iterations_override": args.iterations, + "benchmarks_run": benchmarks_run, + "ld_preload": os.environ.get("LD_PRELOAD", "none"), + } + try: + import socket + info["hostname"] = socket.gethostname() + except Exception: + info["hostname"] = "unknown" + with open(output_path, "w") as f: + json.dump(info, f, indent=2) + + +def main(): + args = parse_args() + + # Setup output directory + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + if args.output_dir: + base_dir = Path(args.output_dir) + else: + base_dir = Path("results") / timestamp + base_dir.mkdir(parents=True, exist_ok=True) + + print(f"====== GekkoFS Performance Benchmark Suite ======") + print(f" Mount dir: {args.mountdir}") + print(f" Output dir: {base_dir}") + print(f" LD_PRELOAD: {os.environ.get('LD_PRELOAD', 'none')}") + if args.iterations: + print(f" Iterations: {args.iterations} (override)") + print() + + # Validate mountdir + if not os.path.isdir(args.mountdir): + print(f"ERROR: Mount directory does not exist: {args.mountdir}") + sys.exit(1) + if not os.access(args.mountdir, os.W_OK): + print(f"ERROR: Mount directory is not writable: {args.mountdir}") + sys.exit(1) + + # Determine which benchmarks to run + if args.benchmarks: + benchmarks = [b for b in BENCHMARKS if b["name"] in args.benchmarks] + else: + benchmarks = BENCHMARKS + + # Run all benchmarks + all_results = [] + benchmarks_run = [] + + for bench in benchmarks: + print(f"--- {bench['name']}: {bench['description']} ---") + success = run_benchmark(bench, args.mountdir, args.iterations, args.verbose) + if success: + benchmarks_run.append(bench["name"]) + for r in success: + r["benchmark"] = bench["name"] + all_results.extend(success) + else: + print(f" SKIPPED {bench['name']}") + print() + + # Write output files + if all_results: + csv_path = base_dir / "benchmark_results.csv" + summary_path = base_dir / "summary.csv" + type_means_path = base_dir / "benchmark_type_means.csv" + info_path = base_dir / "run_info.json" + + write_csv_output(all_results, str(csv_path)) + write_summary_csv(all_results, str(summary_path)) + write_type_means_csv(all_results, str(type_means_path)) + write_run_info(str(info_path), args, benchmarks_run) + + print(f"====== All benchmarks complete! ======") + print(f" Results: {csv_path}") + print(f" Summary: {summary_path}") + print(f" Type means: {type_means_path}") + print(f" Run info: {info_path}") + + # Show type means summary + print(f"\n--- Benchmark Type Means ---") + with open(type_means_path, "r") as f: + reader = csv.reader(f) + for row in reader: + print(f" {row[0]:20s} mean={row[1]:>10s}ms stddev={row[2]:>10s}ms ops={row[3]}") + else: + print("ERROR: No benchmarks ran successfully.") + sys.exit(1) + + # CSV to stdout if requested + if args.csv_stdout and all_results: + print(f"\n=== Combined CSV Output ===") + print("benchmark,operation,iterations,throughput_ops_sec,min_ms,max_ms,mean_ms,median_ms,stddev_ms") + for r in all_results: + print(f"{r['benchmark']},{r['operation']},{r['iterations']},{r['throughput_ops_sec']},{r['min_ms']},{r['max_ms']},{r['mean_ms']},{r['median_ms']},{r['stddev_ms']}") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/src/daemon/backend/metadata/rocksdb_backend.cpp b/src/daemon/backend/metadata/rocksdb_backend.cpp index c1cc8124f96102890a3a2b427af07ca2ffb0e8a8..94c817220ffe9cd8ad0b83b7aa5fcf2c32d984b5 100644 --- a/src/daemon/backend/metadata/rocksdb_backend.cpp +++ b/src/daemon/backend/metadata/rocksdb_backend.cpp @@ -139,7 +139,7 @@ RocksDBBackend::RocksDBBackend(const std::string& path) { options_.OptimizeLevelStyleCompaction(); options_.create_if_missing = true; options_.merge_operator.reset(new MetadataMergeOperator); - options_.write_buffer_size = 128 * 1024 * 1024; + options_.write_buffer_size = 64 * 1024 * 1024; options_.max_write_buffer_number = 4; options_.compression = rocksdb::kNoCompression; options_.max_background_jobs = 4; @@ -840,7 +840,7 @@ RocksDBBackend::db_size_impl() const { */ void RocksDBBackend::optimize_database_impl() { - options_.max_successive_merges = 128; + options_.max_successive_merges = 32; }