benches/results.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

#!/usr/bin/env python3
"""Visualize benchmark results from Google Benchmark JSON output."""

import json
import sys
from pathlib import Path
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np


def load_benchmark_json(filepath):
    """Load and parse a benchmark JSON file."""
    with open(filepath, "r") as f:
        return json.load(f)


def extract_benchmark_data(json_data):
    """Extract relevant benchmark data from JSON."""
    results = defaultdict(lambda: defaultdict(dict))

    for bench in json_data["benchmarks"]:
        if bench["aggregate_name"] != "mean":
            continue

        label = bench.get("label", "")

        # Parse label
        parts = {}
        for item in label.split(","):
            if "=" in item:
                key, value = item.split("=", 1)
                parts[key] = value

        implementation = parts.get("implementation", "unknown")
        dataset = parts.get("dataset", "unknown")
        workload = parts.get("workload", "unknown")

        # Convert nanoseconds to milliseconds
        time_ms = bench["cpu_time"] / 1_000_000

        results[implementation][dataset][workload] = time_ms

    return results


def create_comparison_chart(all_results, output_path):
    """Create a grouped bar chart comparing all implementations."""

    # Define workload order and colors
    workloads = ["Insert", "LookupHit", "LookupMiss", "Erase"]
    datasets = ["Scalar", "HandlePayload", "CompositeKey"]

    # Color scheme
    colors = {
        "Insert": "#3498db",
        "LookupHit": "#2ecc71",
        "LookupMiss": "#e74c3c",
        "Erase": "#f39c12",
    }

    implementations = list(all_results.keys())

    # Create subplots for each dataset
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    title = "HashMap Benchmark Comparison (1M entries, lower is better)"
    fig.suptitle(title, fontsize=16, fontweight="bold")

    for idx, dataset in enumerate(datasets):
        ax = axes[idx]

        x = np.arange(len(implementations))
        width = 0.2

        for i, workload in enumerate(workloads):
            values = []
            for impl in implementations:
                val = all_results[impl].get(dataset, {}).get(workload, 0)
                values.append(val)

            offset = width * (i - 1.5)
            ax.bar(x + offset, values, width, label=workload, color=colors[workload])

        ax.set_ylabel("Time (ms)", fontsize=12)
        ax.set_title(f"{dataset} Dataset", fontsize=14, fontweight="bold")
        ax.set_xticks(x)
        ax.set_xticklabels(implementations, rotation=15, ha="right")
        ax.legend()
        ax.grid(axis="y", alpha=0.3)

    plt.tight_layout()
    plt.savefig(output_path, dpi=300, bbox_inches="tight")
    print(f"Chart saved to: {output_path}")


def main():
    # Load all benchmark JSONs from parent directory
    script_dir = Path(__file__).parent
    root_dir = script_dir.parent

    json_files = {
        "cheesemap": root_dir / "cheesemap.json",
        "std::unordered_map": root_dir / "unordered.json",
        "tidwall": root_dir / "tidwall.json",
        "absl::flat_hash_map": root_dir / "abseil.json",
    }

    all_results = {}

    for name, filepath in json_files.items():
        if not filepath.exists():
            print(f"Warning: {filepath} not found, skipping...")
            continue

        print(f"Loading {filepath}...")
        data = load_benchmark_json(filepath)
        results = extract_benchmark_data(data)

        # Use the implementation name from the data if available
        if results:
            impl_name = list(results.keys())[0]
            all_results[impl_name] = results[impl_name]
        else:
            all_results[name] = {}

    if not all_results:
        print("Error: No valid benchmark data found!")
        sys.exit(1)

    # Create visualizations in root directory
    print("\nGenerating chart...")
    create_comparison_chart(all_results, root_dir / "benchmarks.png")

    print("\nDone!")


if __name__ == "__main__":
    main()