From d9413395d91f80ec7d70eb2e4667db672c763584 Mon Sep 17 00:00:00 2001 From: Fabrice Date: Sat, 21 Mar 2026 10:39:07 +0100 Subject: working on reserving memory adding npow2 working on reserve full allocations --- cheesemap.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- cheesemap.h | 22 +++++++++++-- 2 files changed, 123 insertions(+), 6 deletions(-) diff --git a/cheesemap.c b/cheesemap.c index e610a84..cd4048f 100644 --- a/cheesemap.c +++ b/cheesemap.c @@ -16,10 +16,38 @@ static inline uintptr_t cm_ctz(uintptr_t val) { #error "unknown word width" #endif #else -#error "ctz not implemented for this compiler" +#error "ctz not implemented" #endif } +static inline uintptr_t cm_clz(uintptr_t val) { + assert(val != 0); +#if defined(__GNUC__) || defined(__clang__) +#if UINTPTR_MAX == UINT64_MAX + return (uintptr_t)__builtin_clzll(val); +#elif UINTPTR_MAX == UINT32_MAX + return (uintptr_t)__builtin_clz(val); +#else +#error "unknown word width" +#endif +#else +#error "clz not implemented" +#endif +} + +#define cm_max(x, y) x > y ? x : y +#define cm_ispow2(x) (((x) & ((x) - 1)) == 0) + +static inline uintptr_t cm_align_up(uintptr_t value, uintptr_t alignment) { + assert(cm_ispow2(alignment)); + return (value + alignment - 1) & ~(alignment - 1); +} + +static inline uintptr_t cm_npow2(uintptr_t v) { + if (v <= 1) return 1; + return (uintptr_t)1 << (CM_WORD_WIDTH - cm_clz(v - 1)); +} + struct sequence { uintptr_t pos; uintptr_t stride; @@ -71,6 +99,24 @@ const uint8_t CM_CTRL_STATIC_EMPTY[CM_GROUP_SIZE] = {[0 ... CM_GROUP_SIZE - 1] = CM_CTRL_EMPTY}; /* hashmap implementation */ +static inline uintptr_t cm_buckets_to_capacity(uintptr_t bucket_mask) { + uintptr_t num_buckets = bucket_mask + 1; + return (num_buckets / CM_LOAD_DENOM) * CM_LOAD_NUM; +} + +static inline uintptr_t cm_capacity_to_buckets(uintptr_t capacity) { + uintptr_t min_buckets = + (capacity * CM_LOAD_DENOM + CM_LOAD_NUM - 1) / CM_LOAD_NUM; + uintptr_t buckets = cm_npow2(min_buckets); + return cm_max(buckets, CM_GROUP_SIZE); +} + +static inline uintptr_t cm_ctrl_offset(uintptr_t buckets, + uintptr_t entry_size) { + uintptr_t offset = entry_size * buckets; + return cm_align_up(offset, CM_GROUP_SIZE); +} + static inline uint8_t* cm_raw_elem_at(const struct cheesemap_raw* map, uintptr_t index, uintptr_t entry_size) { assert(map != NULL); @@ -82,7 +128,9 @@ static inline uint8_t* cm_raw_elem_at(const struct cheesemap_raw* map, static inline uint8_t* cm_raw_origin(const struct cheesemap_raw* map, uintptr_t entry_size) { assert(map != NULL); - return cm_raw_elem_at(map, map->bucket_mask + 1, entry_size); + uintptr_t buckets = map->bucket_mask + 1; + uintptr_t ctrl_offset = cm_ctrl_offset(buckets, entry_size); + return map->ctrl - ctrl_offset; } static inline void cm_raw_ctrl_set(struct cheesemap_raw* map, uintptr_t index, @@ -146,6 +194,57 @@ static void cm_raw_insert_at(struct cheesemap_raw* map, cm_hash_t hash, map->count += 1; } +static bool cm_raw_resize(struct cheesemap_raw* map, + const struct cheesemap_fns* fns, uintptr_t key_size, + uintptr_t value_size, uintptr_t new_capacity) { + assert(map != NULL && fns != NULL); + + struct cheesemap_raw new_map; + bool success = + cm_raw_new_with(&new_map, fns, key_size, value_size, new_capacity); + if (!success) return success; + + return true; +} + +bool cm_raw_new_with(struct cheesemap_raw* map, const struct cheesemap_fns* fns, + uintptr_t key_size, uintptr_t value_size, + uintptr_t initial_capacity) { + assert(map != NULL); + memset(map, 0, sizeof(*map)); + + uintptr_t buckets = cm_capacity_to_buckets(initial_capacity); + uintptr_t entry_size = key_size + value_size; + uintptr_t ctrl_offset = cm_ctrl_offset(buckets, entry_size); + uintptr_t size = ctrl_offset + buckets + CM_GROUP_SIZE; + + uint8_t* ptr = fns->malloc(size, fns->mem_usr); + if (ptr == NULL) return false; + + uint8_t* ctrl = ptr + ctrl_offset; + memset(ctrl, CM_CTRL_EMPTY, buckets); + memcpy(ctrl + buckets, ctrl, CM_GROUP_SIZE); + + map->bucket_mask = buckets - 1; + map->growth_left = cm_buckets_to_capacity(map->bucket_mask); + map->ctrl = ctrl; + + return true; +} + +bool cm_raw_reserve(struct cheesemap_raw* map, const struct cheesemap_fns* fns, + uintptr_t key_size, uintptr_t value_size, + uintptr_t additional) { + assert(map != NULL && fns != NULL); + + if (map->growth_left >= additional) return true; + // TODO: inplace rehash + uintptr_t needed = map->count + additional; + uintptr_t capacity = cm_buckets_to_capacity(map->bucket_mask); + uintptr_t new_capacity = cm_max(needed, capacity + 1); + return cm_raw_resize(map, fns, key_size, value_size, new_capacity); +} + bool cm_raw_insert(struct cheesemap_raw* map, const struct cheesemap_fns* fns, uintptr_t key_size, const uint8_t* key, uintptr_t value_size, const uint8_t* value) { @@ -157,7 +256,9 @@ bool cm_raw_insert(struct cheesemap_raw* map, const struct cheesemap_fns* fns, uint8_t old_ctrl = map->ctrl[index]; if (map->growth_left == 0 && cm_ctrl_is_empty(old_ctrl)) { - // TODO: do resize + bool success = cm_raw_reserve(map, fns, key_size, value_size, 1); + if (!success) return success; + index = cm_raw_find_insert_index(map, hash); } cm_raw_insert_at(map, hash, index, key_size, key, value_size, value); diff --git a/cheesemap.h b/cheesemap.h index a4b007b..8002fbb 100644 --- a/cheesemap.h +++ b/cheesemap.h @@ -29,8 +29,7 @@ enum { typedef uint64_t cm_hash_t; /* memory methods */ -typedef void* (*cm_malloc_fn)(uintptr_t size, uintptr_t alignment, - uint8_t* user); +typedef void* (*cm_malloc_fn)(uintptr_t size, uint8_t* user); typedef void (*cm_free_fn)(void* ptr, uint8_t* user); /* hash and compare methods */ @@ -57,7 +56,12 @@ struct cheesemap_fns { // [entries...][control bits...][mirror first CM_GROUP_SIZE bits] enum { + // cheesemap config CM_INITIAL_CAPACITY = CM_GROUP_SIZE, + CM_LOAD_DENOM = 8, + CM_LOAD_NUM = 7, + // + // ctrl ops // -1 as i8, all bits set, top bit = 1 CM_CTRL_EMPTY = 0xFF, // 0b1111_1111 // -128 as i8, top bit = 1 @@ -66,7 +70,13 @@ enum { CM_H2_MASK = 0x7F, // 0b0111_1111 // Mask to get bottom bit CM_CTRL_END = 0x01, // 0b0000_0001 - CM_FP_SIZE = 7 + // Number of fingerprint bytes + CM_FP_SIZE = 7, + // + // aux + // Size of a word in bits + CM_WORD_WIDTH = sizeof(uintptr_t) * CHAR_BIT, + }; static inline uintptr_t cm_h1(cm_hash_t hash) { @@ -94,9 +104,15 @@ struct cheesemap_raw { #define cm_raw_new() \ ((struct cheesemap_raw){.ctrl = (uint8_t*)CM_CTRL_STATIC_EMPTY}) +bool cm_raw_new_with(struct cheesemap_raw* map, const struct cheesemap_fns* fns, + uintptr_t key_size, uintptr_t value_size, + uintptr_t initial_capacity); bool cm_raw_insert(struct cheesemap_raw* map, const struct cheesemap_fns* fns, uintptr_t key_size, const uint8_t* key, uintptr_t value_size, const uint8_t* value); +bool cm_raw_reserve(struct cheesemap_raw* map, const struct cheesemap_fns* fns, + uintptr_t key_size, uintptr_t value_size, + uintptr_t additional); void cm_raw_drop(struct cheesemap_raw* map, uintptr_t key_size, uintptr_t value_size, const struct cheesemap_fns* fns); -- cgit v1.2.3