1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
#!/usr/bin/env python3
"""Visualize benchmark results from Google Benchmark JSON output."""
import json
import sys
from pathlib import Path
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
def load_benchmark_json(filepath):
"""Load and parse a benchmark JSON file."""
with open(filepath, "r") as f:
return json.load(f)
def extract_benchmark_data(json_data):
"""Extract relevant benchmark data from JSON."""
results = defaultdict(lambda: defaultdict(dict))
for bench in json_data["benchmarks"]:
if bench["aggregate_name"] != "mean":
continue
label = bench.get("label", "")
# Parse label
parts = {}
for item in label.split(","):
if "=" in item:
key, value = item.split("=", 1)
parts[key] = value
implementation = parts.get("implementation", "unknown")
dataset = parts.get("dataset", "unknown")
workload = parts.get("workload", "unknown")
# Convert nanoseconds to milliseconds
time_ms = bench["cpu_time"] / 1_000_000
results[implementation][dataset][workload] = time_ms
return results
def create_comparison_chart(all_results, output_path):
"""Create a grouped bar chart comparing all implementations."""
# Define workload order and colors
workloads = ["Insert", "LookupHit", "LookupMiss", "Erase"]
datasets = ["Scalar", "HandlePayload", "CompositeKey"]
# Color scheme
colors = {
"Insert": "#3498db",
"LookupHit": "#2ecc71",
"LookupMiss": "#e74c3c",
"Erase": "#f39c12",
}
implementations = list(all_results.keys())
# Create subplots for each dataset
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
title = "HashMap Benchmark Comparison (1M entries, lower is better)"
fig.suptitle(title, fontsize=16, fontweight="bold")
for idx, dataset in enumerate(datasets):
ax = axes[idx]
x = np.arange(len(implementations))
width = 0.2
for i, workload in enumerate(workloads):
values = []
for impl in implementations:
val = all_results[impl].get(dataset, {}).get(workload, 0)
values.append(val)
offset = width * (i - 1.5)
ax.bar(x + offset, values, width, label=workload, color=colors[workload])
ax.set_ylabel("Time (ms)", fontsize=12)
ax.set_title(f"{dataset} Dataset", fontsize=14, fontweight="bold")
ax.set_xticks(x)
ax.set_xticklabels(implementations, rotation=15, ha="right")
ax.legend()
ax.grid(axis="y", alpha=0.3)
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches="tight")
print(f"Chart saved to: {output_path}")
def main():
# Load all benchmark JSONs from parent directory
script_dir = Path(__file__).parent
root_dir = script_dir.parent
json_files = {
"cheesemap": root_dir / "cheesemap.json",
"std::unordered_map": root_dir / "unordered.json",
"tidwall": root_dir / "tidwall.json",
"absl::flat_hash_map": root_dir / "abseil.json",
}
all_results = {}
for name, filepath in json_files.items():
if not filepath.exists():
print(f"Warning: {filepath} not found, skipping...")
continue
print(f"Loading {filepath}...")
data = load_benchmark_json(filepath)
results = extract_benchmark_data(data)
# Use the implementation name from the data if available
if results:
impl_name = list(results.keys())[0]
all_results[impl_name] = results[impl_name]
else:
all_results[name] = {}
if not all_results:
print("Error: No valid benchmark data found!")
sys.exit(1)
# Create visualizations in root directory
print("\nGenerating chart...")
create_comparison_chart(all_results, root_dir / "benchmarks.png")
print("\nDone!")
if __name__ == "__main__":
main()
|