aboutsummaryrefslogtreecommitdiffstats
path: root/README.md
blob: a6ae2f4323e4b5f5a11db184f4172f7301f53225 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
Cheesemap
=========

Cheesemap is a Swiss-table HashMap implementation in C. It is designed to be fast, easy to understand, and easy to embed - the entire implementation is a single `.c` / `.h` pair with no dependencies.

The map is open-addressing with optional SIMD-accelerated control-byte matching (SSE2). It may use slightly more memory than necessary immediately after a resize and does not yet provide a shrink method. Not yet production-tested but fully functional.

## Example

```c
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "cheesemap.h"

_Noreturn void panic_impl(const char* file, cm_u32 line, const char* fmt, ...) {
  fprintf(stderr, "Panic at %s:%u: ", file, line);
  va_list args;
  va_start(args, fmt);
  vfprintf(stderr, fmt, args);
  va_end(args);
  fprintf(stderr, "\n");
  abort();
}

#define countof(arr) (sizeof(arr) / sizeof(*(arr)))

cm_u64 hash_string(const cm_u8* key, cm_u8* user) {
  (void)user;
  const char* str = *(const char**)key;
  cm_u64 hash = 5381;
  int c;
  while ((c = *str++)) hash = ((hash << 5) + hash) + c;
  return hash;
}

bool compare_string(const cm_u8* key1, const cm_u8* key2, cm_u8* user) {
  (void)user;
  return strcmp(*(const char**)key1, *(const char**)key2) == 0;
}

void* default_alloc(cm_usize size, cm_usize align, cm_u8* user) {
  (void)user;
  return aligned_alloc(align, size);
}

void default_dealloc(void* ptr, cm_u8* user) {
  (void)user;
  free(ptr);
}

int main(void) {
  struct cheesemap map;
  cm_init_(&map, const char*, int, NULL, hash_string, compare_string,
           default_alloc, default_dealloc);

  const char* words[] = {"hello", "world", "hello", "cheesemap", "world", "hello"};
  for (size_t i = 0; i < countof(words); i++) {
    int* count;
    if (cm_lookup_(&map, words[i], &count)) {
      (*count)++;
    } else {
      int initial = 1;
      cm_insert_(&map, words[i], initial);
    }
  }

  printf("Word frequencies:\n");
  struct cheesemap_iter iter;
  cm_iter_init(&iter, &map);
  const char** word;
  int* count;
  while (cm_iter_next_(&iter, &map, &word, &count)) {
    printf("  %s: %d\n", *word, *count);
  }

  const char* search = "hello";
  if (cm_lookup_(&map, search, &count))
    printf("\n'%s' appears %d times\n", search, *count);

  const char* remove = "world";
  cm_remove_(&map, remove, NULL);
  printf("Removed '%s'\n", remove);

  if (!cm_lookup_(&map, remove, &count))
    printf("'%s' no longer in map\n", remove);

  cm_drop(&map);
  return 0;
}
```

## Benchmarks

Comprehensive comparison of 1,000,000 entry operations (Insert, LookupHit, LookupMiss, Erase) across multiple data types. All implementations use the same xxh_avalanche hash function. Times shown are mean values across 5 repetitions with SSE2 enabled.

![Benchmark Comparison](benchmarks.png)

**Notes:**
- absl::flat_hash_map is Google's reference Swiss table implementation
- std::unordered_map uses chained hashing (separate chaining with linked lists)
- tidwall_hashmap is a C implementation using open addressing
- All times in milliseconds (lower is better)

---

Copyright © 2026 Fabrice