Commit 5e840d64 authored by Ramon Nou's avatar Ramon Nou
Browse files

perf csv

parent 2ddf6ab1
Loading
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -136,8 +136,11 @@ add_custom_target(run-all-perf-with-script
# Install targets
install(TARGETS perf_metadata perf_data perf_delete perf_find perf_directory
        RUNTIME DESTINATION bin)
install(FILES run_benchmarks.sh
install(FILES run_benchmarks.sh run_all_benchmarks.py
        DESTINATION bin)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/
        DESTINATION share/gekkofs/perf_tests
        FILES_MATCHING PATTERN "*.sh")
        FILES_MATCHING PATTERN "*.sh"
                 PATTERN "*.py"
                 PATTERN "*.hpp"
                 PATTERN "README.md")
+78 −8
Original line number Diff line number Diff line
@@ -209,19 +209,89 @@ Metric: Value (unit)

## Comparing Results Across Changes

### Method 1: Manual Comparison
### Method 1: Using the Python Runner (recommended for spreadsheet analysis)

1. Run benchmarks before and after your changes
2. Results are saved to `perf_tests/results/` with timestamps
3. Compare the output files manually
This is the easiest way to get CSV output that can be opened in Excel, Google Sheets, R, or Python/pandas.

```bash
# Compare two result files
diff <(grep "Avg:" results/metadata_20260119_120000.txt) \
     <(grep "Avg:" results/metadata_20260119_130000.txt)
# Run all benchmarks and produce CSV files
./run_all_benchmarks.py --mountdir /mnt/gekkofs

# Or with custom iterations
./run_all_benchmarks.py --mountdir /mnt/gekkofs --iterations 100

# Or with LD_PRELOAD
LD_PRELOAD=/path/to/gkfs_syscall_intercept.so ./run_all_benchmarks.py --mountdir /mnt/gekkofs

# Run only specific benchmarks
./run_all_benchmarks.py --mountdir /mnt/gekkofs --benchmarks metadata data

# Custom output directory
./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./my_results
```

This produces the following files in `results/<timestamp>/`:

| File | Description |
|------|-------------|
| `benchmark_results.csv` | All individual benchmark results (one row per operation) |
| `summary.csv` | Aggregated summary with mean-of-means per operation |
| `benchmark_type_means.csv` | One row per benchmark type with overall mean (best for quick comparison) |
| `run_info.json` | Run metadata (timestamp, mountdir, iterations, hostname) |

#### **benchmark_type_means.csv** (for quick comparison)

```csv
benchmark_type,mean_time_ms,stddev_ms,num_operations
data,5.432,2.345,6
delete,3.210,1.876,4
directory,1.234,0.567,4
metadata,0.234,0.156,6
find,12.345,5.678,5
```

Import into:
- **Excel/Google Sheets**: Data → Get Data → From Text/CSV
- **R**: `results <- read.csv("benchmark_type_means.csv")`
- **Python/pandas**: `results = pd.read_csv("benchmark_type_means.csv")`

#### **Comparing two runs side-by-side**

```bash
# Run baseline
./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./baseline

# ... make your code changes ...

# Run after changes
./run_all_benchmarks.py --mountdir /mnt/gekkofs --output-dir ./after

# Compare benchmark_type_means.csv side-by-side
python3 - <<'EOF'
import csv
baseline = {r['benchmark_type']: float(r['mean_time_ms']) for r in csv.DictReader(open('baseline/benchmark_type_means.csv'))}
after = {r['benchmark_type']: float(r['mean_time_ms']) for r in csv.DictReader(open('after/benchmark_type_means.csv'))}
print(f"{'Type':<15} {'Baseline':>10} {'After':>10} {'Change':>10}")
for t in baseline:
    old = baseline[t]
    new = after.get(t, 0)
    pct = ((new - old) / old) * 100 if old else 0
    sign = "+" if pct > 0 else ""
    print(f"{t:<15} {old:>10.4f} {new:>10.4f} {sign}{pct:>9.2f}%")
EOF
```

Output:
```
Type            Baseline        After      Change
data             5.4320        4.8910     -9.96%
delete           3.2100        3.0500     -4.98%
directory        1.2340        1.1890     -3.65%
metadata         0.2340        0.1980    -15.38%
find            12.3450       11.2300     -9.03%
```

### Method 2: Automated Comparison Script
### Method 2: Using the Shell Runner

```bash
# Run baseline
+37 −0
Original line number Diff line number Diff line
@@ -87,6 +87,43 @@ inline TimingResult finalize_timing(const std::vector<double>& intervals_ms) {
    return result;
}

// ============================================================================
// CSV output helpers for spreadsheet/statistical tool integration
// ============================================================================

inline std::string format_timing_csv_header() {
    return "benchmark,operation,iterations,min_ms,max_ms,mean_ms,median_ms,stddev_ms,p50_ms,p90_ms,p95_ms,p99_ms,total_ms,throughput_ops_sec";
}

inline std::string format_timing_csv(const std::string& benchmark_name,
                                      const std::string& op_name,
                                      const TimingResult& result,
                                      size_t total_ops = 0) {
    std::ostringstream oss;
    oss << benchmark_name << ","
        << op_name << ","
        << result.iterations << ","
        << std::fixed << std::setprecision(6)
        << result.min_ms << ","
        << result.max_ms << ","
        << result.mean_ms << ","
        << result.median_ms << ","
        << result.std_dev_ms << ","
        << result.p50_ms << ","
        << result.p90_ms << ","
        << result.p95_ms << ","
        << result.p99_ms << ","
        << result.total_ms;
    if (total_ops > 0 && result.total_ms > 0) {
        oss << "," << std::setprecision(2) 
            << (total_ops / (result.total_ms / 1000.0));
    } else {
        oss << ",0";
    }
    oss << "\n";
    return oss.str();
}

// ============================================================================
// Results formatting
// ============================================================================
+6 −1
Original line number Diff line number Diff line
@@ -340,6 +340,7 @@ void print_usage(const char* prog) {
              << "  --small-files <N>      Number of small files (default: 100)\n"
              << "  --warmup <N>           Number of warmup iterations (default: 5)\n"
              << "  --no-cleanup           Don't cleanup test files after benchmark\n"
              << "  --csv-file <path>      Write CSV results to file (for aggregation)\n"
              << "  --help                 Show this help message\n";
}

@@ -355,6 +356,8 @@ int main(int argc, char* argv[]) {
    int warmup = 5;
    bool cleanup = true;
    
    std::string csv_file_path;
    
    static struct option long_options[] = {
        {"mountdir", required_argument, 0, 'm'},
        {"iterations", required_argument, 0, 'i'},
@@ -362,12 +365,13 @@ int main(int argc, char* argv[]) {
        {"small-files", required_argument, 0, 's'},
        {"warmup", required_argument, 0, 'w'},
        {"no-cleanup", no_argument, 0, 'n'},
        {"csv-file", required_argument, 0, 'c'},
        {"help", no_argument, 0, 'h'},
        {0, 0, 0, 0}
    };
    
    int opt;
    while ((opt = getopt_long(argc, argv, "m:i:f:s:w:nh", long_options, nullptr)) != -1) {
    while ((opt = getopt_long(argc, argv, "m:i:f:s:w:nc:h", long_options, nullptr)) != -1) {
        switch (opt) {
            case 'm': mountdir = optarg; break;
            case 'i': iterations = std::atoi(optarg); break;
@@ -375,6 +379,7 @@ int main(int argc, char* argv[]) {
            case 's': small_files = std::atoi(optarg); break;
            case 'w': warmup = std::atoi(optarg); break;
            case 'n': cleanup = false; break;
            case 'c': csv_file_path = optarg; break;
            case 'h': print_usage(argv[0]); return 0;
            default: print_usage(argv[0]); return 1;
        }
+54 −18
Original line number Diff line number Diff line
@@ -280,6 +280,7 @@ void print_usage(const char* prog) {
              << "  --warmup <N>           Number of warmup iterations (default: 10)\n"
              << "  --batch                Use batch mode for mkdir test\n"
              << "  --no-cleanup           Don't cleanup test files after benchmark\n"
              << "  --csv                  Output results in CSV format (to stdout)\n"
              << "  --help                 Show this help message\n";
}

@@ -295,6 +296,8 @@ int main(int argc, char* argv[]) {
    bool batch_mode = false;
    bool cleanup = true;
    
    bool csv_output = false;
    
    static struct option long_options[] = {
        {"mountdir", required_argument, 0, 'm'},
        {"iterations", required_argument, 0, 'i'},
@@ -302,12 +305,13 @@ int main(int argc, char* argv[]) {
        {"warmup", required_argument, 0, 'w'},
        {"batch", no_argument, 0, 'b'},
        {"no-cleanup", no_argument, 0, 'n'},
        {"csv", no_argument, 0, 'c'},
        {"help", no_argument, 0, 'h'},
        {0, 0, 0, 0}
    };
    
    int opt;
    while ((opt = getopt_long(argc, argv, "m:i:f:w:bnh", long_options, nullptr)) != -1) {
    while ((opt = getopt_long(argc, argv, "m:i:f:w:bnch", long_options, nullptr)) != -1) {
        switch (opt) {
            case 'm': mountdir = optarg; break;
            case 'i': iterations = std::atoi(optarg); break;
@@ -315,6 +319,7 @@ int main(int argc, char* argv[]) {
            case 'w': warmup = std::atoi(optarg); break;
            case 'b': batch_mode = true; break;
            case 'n': cleanup = false; break;
            case 'c': csv_output = true; break;
            case 'h': print_usage(argv[0]); return 0;
            default: print_usage(argv[0]); return 1;
        }
@@ -336,24 +341,37 @@ int main(int argc, char* argv[]) {
    
    std::cout << "Test directory: " << tmp << "\n\n";
    
    // Timing results stored for CSV output
    TimingResult mkdir_single_result;
    TimingResult mkdir_batch_result;
    TimingResult stat_result;
    TimingResult readdir_result;
    TimingResult symlink_create_result;
    TimingResult symlink_batch_result;
    TimingResult symlink_resolve_result;
    TimingResult rename_result;
    bool mkdir_batch_done = false;
    bool symlink_batch_done = false;
    
    // ---- 1. Mkdir benchmark ----
    {
        MkdirBenchmark mkdir_bench(tmp, num_files);
        mkdir_bench.setup();
        
        std::cout << "--- Mkdir Performance ---\n";
        TimingResult single = run_benchmark(
        mkdir_single_result = run_benchmark(
            [&]() {
                        std::string d = std::string(tmp) + "/single_" + std::to_string(rand());
                mkdir(d.c_str(), 0755);
            },
            warmup, iterations
        );
        std::cout << format_timing(single, "Single mkdir (individual)", iterations) << "\n";
        std::cout << format_timing(mkdir_single_result, "Single mkdir (individual)", iterations) << "\n";
        
        if (batch_mode) {
            TimingResult batch = mkdir_bench.batch_run(iterations);
            std::cout << format_timing(batch, "Batch mkdir (" + std::to_string(num_files) + " files/batch)", iterations) << "\n";
            mkdir_batch_result = mkdir_bench.batch_run(iterations);
            std::cout << format_timing(mkdir_batch_result, "Batch mkdir (" + std::to_string(num_files) + " files/batch)", iterations) << "\n";
            mkdir_batch_done = true;
        }
        
        mkdir_bench.cleanup();
@@ -365,7 +383,7 @@ int main(int argc, char* argv[]) {
        stat_bench.setup();
        
        std::cout << "--- Stat Performance ---\n";
        TimingResult result = run_benchmark(
        stat_result = run_benchmark(
            [&]() {
                for (int i = 0; i < num_files; ++i) {
                    struct stat st;
@@ -374,7 +392,7 @@ int main(int argc, char* argv[]) {
            },
            warmup, iterations
        );
        std::cout << format_timing(result, "Stat all files (" + std::to_string(num_files) + " files)", 
        std::cout << format_timing(stat_result, "Stat all files (" + std::to_string(num_files) + " files)", 
                      iterations * num_files) << "\n";
        
        stat_bench.cleanup();
@@ -386,7 +404,7 @@ int main(int argc, char* argv[]) {
        readdir_bench.setup();
        
        std::cout << "--- Readdir Performance ---\n";
        TimingResult result = run_benchmark(
        readdir_result = run_benchmark(
            [&]() {
                DIR* dir = opendir(tmp);
                if (dir) {
@@ -397,7 +415,7 @@ int main(int argc, char* argv[]) {
            },
            warmup, iterations
        );
        std::cout << format_timing(result, "Readdir (" + std::to_string(num_files) + " entries)", iterations) << "\n";
        std::cout << format_timing(readdir_result, "Readdir (" + std::to_string(num_files) + " entries)", iterations) << "\n";
        
        readdir_bench.cleanup();
    }
@@ -408,29 +426,30 @@ int main(int argc, char* argv[]) {
        symlink_bench.setup();
        
        std::cout << "--- Symlink Create Performance ---\n";
        TimingResult create_result = run_benchmark(
        symlink_create_result = run_benchmark(
            [&]() {
                std::string link = std::string(tmp) + "/link_" + std::to_string(rand());
                symlink((std::string(tmp) + "/target_0").c_str(), link.c_str());
            },
            warmup, iterations
        );
        std::cout << format_timing(create_result, "Symlink create (individual)", iterations) << "\n";
        std::cout << format_timing(symlink_create_result, "Symlink create (individual)", iterations) << "\n";
        
        if (batch_mode) {
            TimingResult batch = symlink_bench.batch_create(iterations);
            std::cout << format_timing(batch, "Symlink batch create (" + std::to_string(num_files) + " files/batch)", iterations) << "\n";
            symlink_batch_result = symlink_bench.batch_create(iterations);
            std::cout << format_timing(symlink_batch_result, "Symlink batch create (" + std::to_string(num_files) + " files/batch)", iterations) << "\n";
            symlink_batch_done = true;
        }
        
        std::cout << "\n--- Symlink Resolve Performance ---\n";
        TimingResult resolve_result = run_benchmark(
        symlink_resolve_result = run_benchmark(
            [&]() {
                char buf[4096];
                readlink((std::string(tmp) + "/link_0").c_str(), buf, sizeof(buf) - 1);
            },
            warmup, iterations
        );
        std::cout << format_timing(resolve_result, "Symlink resolve (readlink)", iterations) << "\n";
        std::cout << format_timing(symlink_resolve_result, "Symlink resolve (readlink)", iterations) << "\n";
        
        symlink_bench.cleanup();
    }
@@ -441,7 +460,7 @@ int main(int argc, char* argv[]) {
        rename_bench.setup();
        
        std::cout << "--- Rename Performance ---\n";
        TimingResult result = run_benchmark(
        rename_result = run_benchmark(
            [&]() {
                std::string temp = std::string(tmp) + "/temp_" + std::to_string(rand());
                rename((std::string(tmp) + "/orig_0").c_str(), temp.c_str());
@@ -449,7 +468,7 @@ int main(int argc, char* argv[]) {
            },
            warmup, iterations
        );
        std::cout << format_timing(result, "Rename (back and forth)", iterations) << "\n";
        std::cout << format_timing(rename_result, "Rename (back and forth)", iterations) << "\n";
        
        rename_bench.cleanup();
    }
@@ -462,6 +481,23 @@ int main(int argc, char* argv[]) {
        std::cout << "\nTest files kept in: " << tmp << "\n";
    }
    
    // CSV output
    if (csv_output) {
        std::cout << format_timing_csv_header() << "\n";
        std::cout << format_timing_csv("metadata", "mkdir_single", mkdir_single_result, iterations) << "\n";
        if (mkdir_batch_done) {
            std::cout << format_timing_csv("metadata", "mkdir_batch", mkdir_batch_result, iterations) << "\n";
        }
        std::cout << format_timing_csv("metadata", "stat_all_files", stat_result, iterations * num_files) << "\n";
        std::cout << format_timing_csv("metadata", "readdir", readdir_result, iterations) << "\n";
        std::cout << format_timing_csv("metadata", "symlink_create", symlink_create_result, iterations) << "\n";
        if (symlink_batch_done) {
            std::cout << format_timing_csv("metadata", "symlink_batch_create", symlink_batch_result, iterations) << "\n";
        }
        std::cout << format_timing_csv("metadata", "symlink_resolve", symlink_resolve_result, iterations) << "\n";
        std::cout << format_timing_csv("metadata", "rename", rename_result, iterations) << "\n";
    }
    
    std::cout << "\n=== Metadata benchmark complete ===\n";
    return 0;
}
Loading