diff options
| author | Fabrice <fabrice@schaub-dev.xyz> | 2026-03-23 08:12:21 +0100 |
|---|---|---|
| committer | Fabrice <fabrice@schaub-dev.xyz> | 2026-03-23 08:12:21 +0100 |
| commit | 276d79b1fa139756477e4acdb6b5fac51f2acb90 (patch) | |
| tree | 280f823b844341074520c7dceea6427294853e45 | |
| parent | 17f1bd79e251bd570ad79da2249f8e0bdaf5aa9f (diff) | |
adding sse2 path
| -rw-r--r-- | cheesemap.c | 46 | ||||
| -rw-r--r-- | cheesemap.h | 21 | ||||
| -rw-r--r-- | makefile | 5 |
3 files changed, 58 insertions, 14 deletions
diff --git a/cheesemap.c b/cheesemap.c index ade0659..b0a7b62 100644 --- a/cheesemap.c +++ b/cheesemap.c @@ -6,6 +6,11 @@ #include <stdlib.h> #include <string.h> +#include CM_OPT_ASSERT_PATH +#ifndef assert +#error "assert is not defined" +#endif + #define CM_ATTR(...) __attribute__((__VA_ARGS__)) CM_ATTR(hot) static inline uintptr_t cm_ctz(uintptr_t val) { @@ -34,7 +39,7 @@ CM_ATTR(hot) static inline uintptr_t cm_bitmask_lowest_set_bit(bitmask_t mask) { #if CM_GROUP_SIZE == 8 return cm_ctz(mask) / CHAR_BIT; #elif CM_GROUP_SIZE == 16 - return cm_ctz(mask) + return cm_ctz(mask); #else #error "unknown group size" #endif @@ -105,7 +110,33 @@ static inline group_t cm_group_load(const uint8_t* ctrl); static inline bitmask_t cm_group_match_empty_or_deleted(group_t group); static inline bitmask_t cm_group_match_full(group_t group); +/* sse2 implementation */ +#ifdef CM_SSE2 +static inline group_t cm_group_load(const uint8_t* ctrl) { + assert(ctrl != NULL); + return _mm_loadu_si128((const group_t*)ctrl); +} + +static inline bitmask_t cm_group_match_tag(group_t group, uint8_t tag) { + __m128i cmp = _mm_cmpeq_epi8(group, _mm_set1_epi8(tag)); + return _mm_movemask_epi8(cmp); +} + +static inline bitmask_t cm_group_match_empty_or_deleted(group_t group) { + return _mm_movemask_epi8(group); +} + +static inline bitmask_t cm_group_match_empty(group_t group) { + return cm_group_match_tag(group, CM_CTRL_EMPTY); +} + +static inline bitmask_t cm_group_match_full(group_t group) { + return ~cm_group_match_empty_or_deleted(group); +} +#endif + /* scalar implementation */ +#ifndef CM_NO_FALLBACK static inline group_t cm_group_repeat(uint8_t v) { return (group_t)v * (((group_t)-1) / (uint8_t)~0); } @@ -136,8 +167,19 @@ static inline bitmask_t cm_group_match_tag(group_t group, uint8_t tag) { return (cmp - cm_group_repeat(CM_CTRL_END)) & ~cmp & cm_group_repeat(CM_CTRL_DELETED); } +#endif + +/* ctrl's n stuff */ + +static inline uintptr_t cm_h1(cm_hash_t hash) { + return (uintptr_t)(hash >> CM_FP_SIZE); +} + +static inline uint8_t cm_h2(cm_hash_t hash) { + uintptr_t top = hash >> (sizeof(cm_hash_t) * CHAR_BIT - CM_FP_SIZE); + return (uint8_t)(top & CM_H2_MASK); +} -/* static ctrl's */ const uint8_t CM_CTRL_STATIC_EMPTY[CM_GROUP_SIZE] = {[0 ... CM_GROUP_SIZE - 1] = CM_CTRL_EMPTY}; diff --git a/cheesemap.h b/cheesemap.h index 7cdb2de..5d87a6c 100644 --- a/cheesemap.h +++ b/cheesemap.h @@ -13,14 +13,20 @@ extern "C" { #include <stdbool.h> #include <stdint.h> -#include CM_OPT_ASSERT_PATH -#ifndef assert -#error "assert is not defined" +#ifdef CM_SSE2 +#include <emmintrin.h> + +typedef __m128i group_t; +typedef uint16_t bitmask_t; +#define CM_GROUP_SIZE 16 +#define CM_NO_FALLBACK #endif +#ifndef CM_NO_FALLBACK typedef uintptr_t group_t; typedef group_t bitmask_t; #define CM_GROUP_SIZE __SIZEOF_POINTER__ +#endif //////////////////////////////// // cheesemap callback functions @@ -63,15 +69,6 @@ enum { }; -static inline uintptr_t cm_h1(cm_hash_t hash) { - return (uintptr_t)(hash >> CM_FP_SIZE); -} - -static inline uint8_t cm_h2(cm_hash_t hash) { - uintptr_t top = hash >> (sizeof(cm_hash_t) * CHAR_BIT - CM_FP_SIZE); - return (uint8_t)(top & CM_H2_MASK); -} - extern const uint8_t CM_CTRL_STATIC_EMPTY[CM_GROUP_SIZE]; struct cheesemap_raw { @@ -5,6 +5,7 @@ CM_OPT_ASSERT_PATH ?= <assert.h> CM_OPT_RELEASE ?= 1 CM_OPT_ENABLE_UBSAN ?= 0 CM_OPT_ENABLE_ASAN ?= 0 +CM_OPT_ENABLE_SSE2 ?= 0 CC ?= gcc @@ -39,6 +40,10 @@ ifeq ($(CM_OPT_ENABLE_ASAN),1) CM_CC_FLAGS += -fsanitize=address endif +ifeq ($(CM_OPT_ENABLE_SSE2),1) + CM_CC_FLAGS += -DCM_SSE2=1 +endif + .PHONY: all all: $(CM_OBJECT) $(CM_DEMO) |
