diff --git a/README.md b/README.md index aaf23c59..1c727998 100644 --- a/README.md +++ b/README.md @@ -832,6 +832,8 @@ List of added, changed. removed features: * algorithm: Added shuffle, iter_swap, reverse, reverse_range, lexicographical_compare, is_sorted, is_sorted_until. Requires now INCLUDE_ALGORITHM + * unordered_set and children: removed hash and equal init args, and fields. + They must be now declared statically beforehand as `T_hash` and `T_equal`. * array: Added difference, intersection, symmetric_difference, assign_range. * set: Added includes, includes_range. * string: Added find_if, find_if_not, find_if_range, find_if_not_range, includes, diff --git a/api.lst b/api.lst index ea5f9ead..6d78ba7b 100644 --- a/api.lst +++ b/api.lst @@ -343,7 +343,7 @@ ctl/unordered_set.h: max_bucket_count (A *self) ctl/unordered_set.h: load_factor (A *self) ctl/unordered_set.h: _reserve (A *self, const size_t new_size) ctl/unordered_set.h: reserve (A *self, size_t desired_count) -ctl/unordered_set.h: init (size_t (*_hash)(T *), int (*_equal)(T *, T *)) +ctl/unordered_set.h: init (void) ctl/unordered_set.h: init_from (A *copy) ctl/unordered_set.h: rehash (A *self, size_t desired_count) ctl/unordered_set.h: _rehash (A *self, size_t count) diff --git a/ctl/bits/integral.h b/ctl/bits/integral.h index d6aa9aa7..9a392b4d 100644 --- a/ctl/bits/integral.h +++ b/ctl/bits/integral.h @@ -1,4 +1,5 @@ -/* Type utilities, to apply default equal, compare, hash methods for intergral types. +/* Type utilities, to apply default equal, compare for integral types. + And hash methods. See MIT LICENSE. */ @@ -14,7 +15,47 @@ _define_integral_compare(long) #undef _define_integral_compare */ +#include + +#ifndef CTL_HASH_DEFAULTS +#define CTL_HASH_DEFAULTS +static inline uint32_t ctl_int32_hash(uint32_t key) +{ + key = ((key >> 16) ^ key) * 0x45d9f3b; + key = ((key >> 16) ^ key) * 0x45d9f3b; + key = (key >> 16) ^ key; + return key; +} +/* FNV1a. Eventually wyhash or o1hash */ +static inline size_t ctl_string_hash(const char* key) +{ + size_t h; + h = 2166136261u; + for (unsigned i = 0; i < strlen((char *)key); i++) + { + h ^= (unsigned char)key[i]; + h *= 16777619; + } + return h; +} + #if defined(POD) && !defined(NOT_INTEGRAL) +static inline int JOIN(T, equal)(T *a, T *b) +{ + return *a == *b; +} +#endif + +#endif //CTL_HASH_DEFAULTS + +#if defined(POD) && !defined(NOT_INTEGRAL) + +#ifdef CTL_USET +static inline size_t _JOIN(A, _default_integral_hash)(T *a) +{ + return ctl_int32_hash((uint32_t)*a); +} +#endif //USET static inline int _JOIN(A, _default_integral_compare3)(T *a, T *b) { @@ -34,30 +75,6 @@ static inline int _JOIN(A, _default_integral_equal)(T *a, T *b) */ } -static inline size_t _JOIN(A, _default_integral_hash)(T *a) -{ - return (size_t)*a; -} - -#include - -#if defined str || defined u8string || defined charp || defined u8ident || defined ucharp - -static inline size_t _JOIN(A, _default_string_hash)(T *key) -{ - size_t h; - /* FNV1a, not wyhash */ - h = 2166136261u; - for (unsigned i = 0; i < strlen((char *)key); i++) - { - h ^= (unsigned char)key[i]; - h *= 16777619; - } - return h; -} - -#endif - #define CTL_STRINGIFY_HELPER(n) #n #define CTL_STRINGIFY(n) CTL_STRINGIFY_HELPER(n) #define _strEQcc(s1c, s2c) !strcmp(s1c "", s2c "") @@ -83,47 +100,29 @@ static inline bool _JOIN(A, _type_is_integral)(void) _strEQcc(CTL_STRINGIFY(T), "llong"); } -// not C++ -#ifndef __cplusplus -#define __set_str_hash(self, t) \ - { \ - typeof(t) tmp = (x); \ - if (__builtin_types_compatible_p(typeof(t), char *)) \ - self->hash = _JOIN(A, _default_string_hash); \ - else if (__builtin_types_compatible_p(typeof(t), unsigned char *)) \ - self->hash = _JOIN(A, _default_string_hash); \ - } -#else -#define __set_str_hash(self, t) self->hash = _JOIN(A, _default_string_hash) -#endif - static inline void _JOIN(A, _set_default_methods)(A *self) { #if !defined CTL_STR #if defined str || defined u8string || defined charp || defined u8ident || defined ucharp { -#ifdef CTL_USET - if (!self->hash) - __set_str_hash(self, T); -#else +#ifndef CTL_USET if (!self->compare) self->compare = str_key_compare; -#endif if (!self->equal) self->equal = str_equal; +#endif } else #endif #endif -#ifdef CTL_USET - if (!self->hash) - self->hash = _JOIN(A, _default_integral_hash); -#else +#ifndef CTL_USET if (!self->compare) self->compare = _JOIN(A, _default_integral_compare); -#endif if (!self->equal) self->equal = _JOIN(A, _default_integral_equal); +#else + (void)self; +#endif } #else diff --git a/ctl/unordered_set.h b/ctl/unordered_set.h index 5a69fd01..7abc9bf6 100644 --- a/ctl/unordered_set.h +++ b/ctl/unordered_set.h @@ -118,8 +118,6 @@ typedef struct A float max_load_factor; void (*free)(T *); T (*copy)(T *); - size_t (*hash)(T *); - int (*equal)(T *, T *); #if CTL_USET_SECURITY_COLLCOUNTING == 4 bool is_sorted_vector; #elif CTL_USET_SECURITY_COLLCOUNTING == 5 @@ -144,11 +142,11 @@ static inline size_t JOIN(A, bucket_count)(A *self) static inline size_t JOIN(I, index)(A *self, T value) { #ifdef CTL_USET_GROWTH_POWER2 - return self->hash(&value) & self->bucket_max; + return JOIN(T, hash)(&value) & self->bucket_max; #elif __WORDSIZE == 127 - return ((uint64_t) self->hash(&value) * ((uint64_t) self->bucket_max + 1)) >> 32; + return ((uint64_t) JOIN(T, hash)(&value) * ((uint64_t) self->bucket_max + 1)) >> 32; #else - return self->hash(&value) % (self->bucket_max + 1); + return JOIN(T, hash)(&value) % (self->bucket_max + 1); #endif } @@ -322,10 +320,12 @@ JOIN(I, range)(A* container, I* begin, I* end) } */ +// needed for algorithm static inline int JOIN(A, _equal)(A *self, T *a, T *b) { - ASSERT(self->equal || !"equal undefined"); - return self->equal(a, b); + //ASSERT(JOIN(T, equal) || !"equal undefined"); + (void)self; + return JOIN(T, equal)(a, b); } static inline A JOIN(A, init_from)(A *copy); @@ -518,7 +518,7 @@ static inline B **JOIN(A, _bucket_hash)(A *self, size_t hash) static inline B **JOIN(A, _bucket)(A *self, T value) { const size_t hash = JOIN(I, index)(self, value); - //LOG ("_bucket %lx %% %lu => %zu\n", self->hash(&value), self->bucket_max + 1, hash); + //LOG ("_bucket %lx %% %lu => %zu\n", JOIN(T, hash)(&value), self->bucket_max + 1, hash); return &self->buckets[hash]; } #endif @@ -526,7 +526,7 @@ static inline B **JOIN(A, _bucket)(A *self, T value) static inline size_t JOIN(A, bucket)(A *self, T value) { const size_t hash = JOIN(I, index)(self, value); - //LOG ("bucket %lx %% %lu => %zu\n", self->hash(&value), self->bucket_max + 1, hash); + //LOG ("bucket %lx %% %lu => %zu\n", JOIN(T, hash)(&value), self->bucket_max + 1, hash); return hash; } @@ -613,12 +613,10 @@ static inline void JOIN(A, reserve)(A *self, size_t desired_count) JOIN(A, _rehash)(self, new_size); } -static inline A JOIN(A, init)(size_t (*_hash)(T *), int (*_equal)(T *, T *)) +static inline A JOIN(A, init)(void) { static A zero; A self = zero; - self.hash = _hash; - self.equal = _equal; #ifdef POD self.copy = JOIN(A, implicit_copy); _JOIN(A, _set_default_methods)(&self); @@ -633,16 +631,8 @@ static inline A JOIN(A, init)(size_t (*_hash)(T *), int (*_equal)(T *, T *)) static inline A JOIN(A, init_from)(A *copy) { - static A zero; - A self = zero; -#ifdef POD - self.copy = JOIN(A, implicit_copy); -#else - self.free = JOIN(T, free); - self.copy = JOIN(T, copy); -#endif - self.hash = copy->hash; - self.equal = copy->equal; + A self = JOIN(A, init)(); + JOIN(A, _reserve)(&self, copy->bucket_max + 1); return self; } @@ -650,7 +640,7 @@ static inline void JOIN(A, rehash)(A *self, size_t desired_count) { if (desired_count == (self->bucket_max + 1)) return; - A rehashed = JOIN(A, init)(self->hash, self->equal); + A rehashed = JOIN(A, init)(); JOIN(A, reserve)(&rehashed, desired_count); if (LIKELY(self->buckets && self->size)) // if desired_count 0 { @@ -681,7 +671,7 @@ static inline void JOIN(A, _rehash)(A *self, size_t count) // we do allow shrink here if (count == self->bucket_max + 1) return; - A rehashed = JOIN(A, init)(self->hash, self->equal); + A rehashed = JOIN(A, init)(); //LOG("_rehash %zu => %zu\n", self->size, count); JOIN(A, _reserve)(&rehashed, count); @@ -714,7 +704,7 @@ static inline B *JOIN(A, find_node)(A *self, T value) if (self->size) { #ifdef CTL_USET_CACHED_HASH - size_t hash = self->hash(&value); + size_t hash = JOIN(T, hash)(&value); B **buckets = JOIN(A, _bucket_hash)(self, hash); #else B **buckets = JOIN(A, _bucket)(self, value); @@ -739,7 +729,7 @@ static inline B *JOIN(A, find_node)(A *self, T value) if (n->cached_hash != hash) continue; #endif - if (self->equal(&value, &n->value)) + if (JOIN(T, equal)(&value, &n->value)) { #if 0 // not yet // speedup subsequent read accesses? @@ -802,7 +792,7 @@ static inline B **JOIN(A, push_cached)(A *self, T *value) #endif #ifdef CTL_USET_CACHED_HASH - size_t hash = self->hash(value); + size_t hash = JOIN(T, hash)(value); B **buckets = JOIN(A, _bucket_hash)(self, hash); JOIN(B, push)(buckets, JOIN(B, init_cached)(*value, hash)); #else @@ -899,7 +889,7 @@ static inline I JOIN(A, emplace_hint)(I *pos, T *value) if (!JOIN(I, done)(pos)) { #ifdef CTL_USET_CACHED_HASH - size_t hash = self->hash(value); + size_t hash = JOIN(T, hash)(value); B **buckets = JOIN(A, _bucket_hash)(self, hash); #else B **buckets = JOIN(A, _bucket)(self, *value); @@ -924,7 +914,7 @@ static inline I JOIN(A, emplace_hint)(I *pos, T *value) if (n->cached_hash != hash) continue; #endif - if (self->equal(value, &n->value)) + if (JOIN(T, equal)(value, &n->value)) { FREE_VALUE(self, *value); return JOIN(I, iter)(self, n); @@ -1060,7 +1050,7 @@ static inline void JOIN(A, _linked_erase)(A *self, B **bucket, B *n, B *prev, B static inline void JOIN(A, erase)(A *self, T value) { #ifdef CTL_USET_CACHED_HASH - size_t hash = self->hash(&value); + size_t hash = JOIN(T, hash)(&value); B **buckets = JOIN(A, _bucket_hash)(self, hash); #else B **buckets = JOIN(A, _bucket)(self, value); @@ -1078,7 +1068,7 @@ static inline void JOIN(A, erase)(A *self, T value) continue; } #endif - if (self->equal(&value, &n->value)) + if (JOIN(T, equal)(&value, &n->value)) { JOIN(A, _linked_erase)(self, buckets, n, prev, next); break; @@ -1115,7 +1105,7 @@ static inline size_t JOIN(A, erase_if)(A *self, int (*_match)(T *)) static inline A JOIN(A, copy)(A *self) { // LOG ("copy\norig size: %lu\n", self->size); - A other = JOIN(A, init)(self->hash, self->equal); + A other = JOIN(A, init)(); JOIN(A, _reserve)(&other, self->bucket_max + 1); foreach (A, self, it) { @@ -1154,7 +1144,7 @@ static inline void JOIN(A, erase_generic)(A* self, GI *range) static inline A JOIN(A, union)(A *a, A *b) { - A self = JOIN(A, init)(a->hash, a->equal); + A self = JOIN(A, init)(); JOIN(A, _reserve)(&self, 1 + MAX(a->bucket_max, b->bucket_max)); foreach (A, a, it1) JOIN(A, insert)(&self, self.copy(it1.ref)); @@ -1182,7 +1172,7 @@ static inline A JOIN(A, union_range)(I *r1, GI *r2) static inline A JOIN(A, intersection)(A *a, A *b) { - A self = JOIN(A, init)(a->hash, a->equal); + A self = JOIN(A, init)(); foreach (A, a, it) if (JOIN(A, find_node)(b, *it.ref)) JOIN(A, insert)(&self, self.copy(it.ref)); @@ -1192,7 +1182,7 @@ static inline A JOIN(A, intersection)(A *a, A *b) static inline A JOIN(A, intersection_range)(I *r1, GI *r2) { A *a = r1->container; - A self = JOIN(A, init)(a->hash, a->equal); + A self = JOIN(A, init)(); void (*next2)(struct I*) = r2->vtable.next; T* (*ref2)(struct I*) = r2->vtable.ref; int (*done2)(struct I*) = r2->vtable.done; @@ -1214,7 +1204,7 @@ static inline A JOIN(A, intersection_range)(I *r1, GI *r2) static inline A JOIN(A, difference)(A *a, A *b) { - A self = JOIN(A, init)(a->hash, a->equal); + A self = JOIN(A, init)(); foreach (A, a, it) if (!JOIN(A, find_node)(b, *it.ref)) JOIN(A, insert)(&self, self.copy(it.ref)); diff --git a/docs/hashtable.md b/docs/hashtable.md index 6c3fff95..8e6948e6 100644 --- a/docs/hashtable.md +++ b/docs/hashtable.md @@ -1,6 +1,6 @@ # hashtable - CTL - C Container Template library -Defined in header ****, CTL prefix **htbl**, +Defined in header ****, CTL prefix **hmap**, parent of [unordered_map](unordered_map.md) No implementation yet. @@ -13,7 +13,7 @@ No implementation yet. } charint; static inline size_t - charint_hash(charint *a) { return FNV1a(a->key); } + charint_hash(charint *a) { return ctl_string_hash(a->key); } static inline int charint_equal(charint *a, charint *b) { return strcmp(a->key, b->key) == 0; } @@ -35,19 +35,19 @@ No implementation yet. #define T charint #include - htbl_charint a = htbl_charint_init(1000, charint_hash, charint_equal); + hmap_charint a = hmap_charint_init(); char c_char[36]; for (int i=0; i<1000; i++) { snprintf(c_char, 36, "%c%d", 48 + (rand() % 74), rand()); //str s = (str){.value = c_char}; - htbl_charint_insert(&a, charint_copy(&(charint){ c_char, i })); + hmap_charint_insert(&a, charint_copy(&(charint){ c_char, i })); } - foreach(htbl_charint, &a, it) { strcpy (c_char, it.ref->key); } + foreach(hmap_charint, &a, it) { strcpy (c_char, it.ref->key); } printf("last key \"%s\", ", c_char); - foreach(htbl_charint, &a, it) { htbl_charint_bucket_size(it.node); } - printf("htbl_charint load_factor: %f\n", htbl_charint_load_factor(&a)); - htbl_charint_free(&a); + foreach(hmap_charint, &a, it) { hmap_charint_bucket_size(it.node); } + printf("hmap_charint load_factor: %f\n", hmap_charint_load_factor(&a)); + hmap_charint_free(&a); ## DESCRIPTION @@ -55,8 +55,8 @@ No implementation yet. that contains a set of unique objects of type Key. Search, insertion, and removal have average constant-time complexity. -The function names are composed of the prefix **htbl_**, the user-defined type -**T** and the method name. E.g `htbl_charint` with `#define T charint`. The type +The function names are composed of the prefix **hmap_**, the user-defined type +**T** and the method name. E.g `hmap_charint` with `#define T charint`. The type must be a custom struct. Internally, the elements are not sorted in any particular order, but organized @@ -83,19 +83,19 @@ It has the same API as unordered_map, just no Bucket interface. ## Member functions -[init](htbl/init.md) `(bucket_count, T_hash(T*), T_equal(T*, T*))` +[init](hmap/init.md) `(bucket_count, T_hash(T*), T_equal(T*, T*))` constructs the hash table. -[free](htbl/free.md) `(A* self)` +[free](hmap/free.md) `(A* self)` destructs the hash table. -[assign](htbl/assign.md) `(A* self, A* other)` +[assign](hmap/assign.md) `(A* self, A* other)` replaces the contents of the container. -[copy](htbl/copy.md) `(A* self)` +[copy](hmap/copy.md) `(A* self)` returns a copy of the container. @@ -122,15 +122,15 @@ See [iterators](iterators.md) for more. ## Capacity -[empty](htbl/empty.md) `(A* self)` +[empty](hmap/empty.md) `(A* self)` checks whether the container is empty -[size](htbl/size.md) `(A* self)` +[size](hmap/size.md) `(A* self)` returns the number of non-empty and non-deleted elements -[capacity](htbl/size.md) `(A* self)` +[capacity](hmap/size.md) `(A* self)` returns the size of the array @@ -140,15 +140,15 @@ returns the maximum possible number of elements ## Modifiers -[clear](htbl/clear.md) `(A* self)` +[clear](hmap/clear.md) `(A* self)` clears the contents -[insert](htbl/insert.md) `(A* self, T value)` +[insert](hmap/insert.md) `(A* self, T value)` inserts the element `(C++17)` -[emplace](htbl/emplace.md) `(A* self, T values...)` +[emplace](hmap/emplace.md) `(A* self, T values...)` constructs elements in-place. (NYI) @@ -156,89 +156,89 @@ constructs elements in-place. (NYI) constructs elements in-place at position. (NYI) -[erase](htbl/erase.md) `(A* self, T key)` +[erase](hmap/erase.md) `(A* self, T key)` erases the element by key -[erase_it](htbl/erase.md) `(A* self, I* pos)` +[erase_it](hmap/erase.md) `(A* self, I* pos)` erases the element at pos -[erase_range](htbl/erase.md) `(A* self, I* first, I* last)` +[erase_range](hmap/erase.md) `(A* self, I* first, I* last)` erases elements -[swap](htbl/swap.md) `(A* self, A* other)` +[swap](hmap/swap.md) `(A* self, A* other)` swaps the contents -[extract](htbl/extract.md) `(A* self, T key)` -[extract_it](htbl/extract.md) `(A* self, I* pos)` +[extract](hmap/extract.md) `(A* self, T key)` +[extract_it](hmap/extract.md) `(A* self, I* pos)` extracts a node from the container. NYI -[merge](htbl/merge.md) `(A* self)` +[merge](hmap/merge.md) `(A* self)` splices nodes from another container ## Lookup -[count](htbl/count.md) `(A* self)` +[count](hmap/count.md) `(A* self)` returns the number of elements matching specific key -[find](htbl/find.md) `(A* self, T key)` +[find](hmap/find.md) `(A* self, T key)` finds element with specific key -[contains](htbl/contains.md) `(A* self, T key)` +[contains](hmap/contains.md) `(A* self, T key)` checks if the container contains element with specific key. (C++20) -[equal_range](htbl/equal_range.md) `(A* self)` +[equal_range](hmap/equal_range.md) `(A* self)` returns range of elements matching a specific key. (NYI) ## Hash policy -[load_factor](htbl/load_factor.md) `(A* self)` +[load_factor](hmap/load_factor.md) `(A* self)` returns average number of elements per bucket -[max_load_factor](htbl/max_load_factor.md) `()` -[set_max_load_factor](htbl/max_load_factor.md) `(A* self, float factor)` +[max_load_factor](hmap/max_load_factor.md) `()` +[set_max_load_factor](hmap/max_load_factor.md) `(A* self, float factor)` manages maximum average number of elements per bucket. defaults to 0.70 -[rehash](htbl/rehash.md) `(A* self, size_t bucket_count)` +[rehash](hmap/rehash.md) `(A* self, size_t bucket_count)` reserves at least the specified number of buckets. This regenerates the hash table. -[reserve](htbl/reserve.md) `(A* self, size_t desired_size)` +[reserve](hmap/reserve.md) `(A* self, size_t desired_size)` reserves space for at least the specified number of elements. This regenerates the hash table. ## Non-member functions -[swap](htbl/swap.md) `(A* self)` +[swap](hmap/swap.md) `(A* self)` specializes the swap algorithm -[remove_if](htbl/remove_if.md) `(A* self, int T_match(T*))` +[remove_if](hmap/remove_if.md) `(A* self, int T_match(T*))` Removes all elements satisfying specific criteria. -[erase_if](htbl/erase_if.md) `(A* self, int T_match(T*))` +[erase_if](hmap/erase_if.md) `(A* self, int T_match(T*))` erases all elements satisfying specific criteria (C++20) -[intersection](htbl/intersection.md) `(A* self, A* other)` +[intersection](hmap/intersection.md) `(A* self, A* other)` -[union](htbl/union.md) `(A* self, A* other)` +[union](hmap/union.md) `(A* self, A* other)` -[difference](htbl/difference.md) `(A* self, A* other)` +[difference](hmap/difference.md) `(A* self, A* other)` -[symmetric_difference](htbl/symmetric_difference.md) `(A* self, A* other)` +[symmetric_difference](hmap/symmetric_difference.md) `(A* self, A* other)` diff --git a/docs/images/uset.log.png b/docs/images/uset.log.png index dc762c0e..d3540f67 100644 Binary files a/docs/images/uset.log.png and b/docs/images/uset.log.png differ diff --git a/docs/images/uset_cached.log.png b/docs/images/uset_cached.log.png index 1dc8bb01..cdfe8604 100644 Binary files a/docs/images/uset_cached.log.png and b/docs/images/uset_cached.log.png differ diff --git a/docs/images/uset_pow2.log.png b/docs/images/uset_pow2.log.png index fd4b0aa3..113f3f04 100644 Binary files a/docs/images/uset_pow2.log.png and b/docs/images/uset_pow2.log.png differ diff --git a/docs/index.md b/docs/index.md index 14d5c613..5ad04729 100644 --- a/docs/index.md +++ b/docs/index.md @@ -834,6 +834,8 @@ List of added, changed. removed features: * algorithm: Added shuffle, iter_swap, reverse, reverse_range, lexicographical_compare, is_sorted, is_sorted_until. Requires now INCLUDE_ALGORITHM + * unordered_set and children: removed hash and equal init args, and fields. + They must be now declared statically beforehand as `T_hash` and `T_equal`. * array: Added difference, intersection, symmetric_difference, assign_range. * set: Added includes, includes_range. * string: Added find_if, find_if_not, find_if_range, find_if_not_range, includes, diff --git a/docs/unordered_map.md b/docs/unordered_map.md index 4c7aac75..885dab6d 100644 --- a/docs/unordered_map.md +++ b/docs/unordered_map.md @@ -13,7 +13,7 @@ Implementation in work still. Esp. lookup should be key only. } charint; static inline size_t - charint_hash(charint *a) { return FNV1a(a->key); } + charint_hash(charint *a) { return ctl_string_hash(a->key); } static inline int charint_equal(charint *a, charint *b) { return strcmp(a->key, b->key) == 0; } @@ -35,8 +35,8 @@ Implementation in work still. Esp. lookup should be key only. #define T charint #include - umap_charint a = umap_charint_init(1000, charint_hash, charint_equal); - + umap_charint a = umap_charint_init(); + char c_char[36]; for (int i=0; i<1000; i++) { snprintf(c_char, 36, "%c%d", 48 + (rand() % 74), rand()); diff --git a/docs/unordered_set.md b/docs/unordered_set.md index 691c8f34..c039a687 100644 --- a/docs/unordered_set.md +++ b/docs/unordered_set.md @@ -5,14 +5,14 @@ parent of [unordered_map](unordered_map.md) # SYNOPSIS - size_t int_hash(int* x) { return abs(*x); } + size_t int_hash(int* x) { return ctl_int32_hash(*x); } int int_eq(int* a, int* b) { return *a == *b; } #define POD #define T int #include - uset_int a = uset_int_init(int_hash, int_eq); + uset_int a = uset_int_init(); for (int i=0; i < 120; i++) uset_int_insert(&a, rand()); @@ -58,11 +58,9 @@ element's hash and corrupt the container. ## Member functions - A init (T_hash(T*), T_equal(T*, T*)) + A init () constructs the hash table. -With INTEGRAL types the members may be NULL, and are then set to default -methods. free (A* self) @@ -332,3 +330,11 @@ be worse, so it's not yet enabled. `CTL_USET_CACHED_HASH` is faster with high load factors. Overall the STL `unordered_set` performance is bad, and our is also not much better. That's why we will add better hash tables and a btree, with less stability guarantees. + +# Changes + +* 202104: + Starting with CTL_VERSION 202104 there are no dynamic hash and equal fields + anymore. + They must be defined statically as `T_hash` and `T_equal` methods beforehand. + diff --git a/tests/func/digi.hh b/tests/func/digi.hh index 2e7be7f1..1df823bf 100644 --- a/tests/func/digi.hh +++ b/tests/func/digi.hh @@ -59,19 +59,10 @@ digi_equal(digi* a, digi* b) return *a->value == *b->value; } -static inline uint32_t -int_hash_func (uint32_t key) -{ - key = ((key >> 16) ^ key) * 0x45d9f3b; - key = ((key >> 16) ^ key) * 0x45d9f3b; - key = (key >> 16) ^ key; - return key; -} - static inline size_t digi_hash(digi* a) { - return (size_t)int_hash_func(*a->value); + return (size_t)ctl_int32_hash(*a->value); } struct DIGI @@ -137,7 +128,7 @@ struct DIGI } size_t hash(const DIGI& a) const { - return (size_t)int_hash_func(*a.value); + return (size_t)ctl_int32_hash(*a.value); } }; @@ -145,7 +136,7 @@ class DIGI_hash { public: size_t operator()(const DIGI& a) const { - return (size_t)int_hash_func(*a.value); + return (size_t)ctl_int32_hash(*a.value); } }; diff --git a/tests/func/strint.hh b/tests/func/strint.hh index 6edf0be3..58ebe693 100644 --- a/tests/func/strint.hh +++ b/tests/func/strint.hh @@ -31,18 +31,6 @@ strint_free(strint* self) str_free(&self->key); } -static size_t -FNV1a(const char *key) -{ - size_t h; - h = 2166136261u; - for (unsigned i = 0; i < strlen(key); i++) { - h ^= (unsigned char)key[i]; - h *= 16777619; - } - return h; -} - static inline int strint_compare(strint* a, strint* b) { @@ -53,7 +41,7 @@ static inline size_t strint_hash(strint* a) { const char* key = a->key.vector; - return key && *key ? (size_t)FNV1a(key) : 0UL; + return key && *key ? ctl_string_hash(key) : 0UL; } static inline strint diff --git a/tests/func/test_c11.c b/tests/func/test_c11.c index 4a9eaff1..b768723e 100644 --- a/tests/func/test_c11.c +++ b/tests/func/test_c11.c @@ -8,6 +8,11 @@ #error "POD leftover from ctl/string.h" #endif +static inline size_t +int_hash(int* a) +{ + return (size_t)ctl_int32_hash((uint32_t)*a); +} #define POD #define T int #include @@ -16,16 +21,6 @@ #error "POD leftover" #endif -size_t int_hash(int *x) -{ - return abs(*x); -} - -int int_equal(int *a, int *b) -{ - return *a == *b; -} - size_t FNV1a(const char *key) { size_t h; diff --git a/tests/func/test_container_composing.cc b/tests/func/test_container_composing.cc index 568094bb..55e65de5 100644 --- a/tests/func/test_container_composing.cc +++ b/tests/func/test_container_composing.cc @@ -25,6 +25,12 @@ #define T deq_int #include +static inline size_t +deq_int_hash(deq_int* a) +{ + return (size_t)ctl_int32_hash((uint32_t)*deq_int_front(a)); +} + #define T deq_int #include @@ -53,6 +59,11 @@ #define T list_int #include +static inline size_t +list_int_hash(list_int* a) +{ + return a->head ? (size_t)ctl_int32_hash((uint32_t)*list_int_front(a)) : 0; +} #define T list_int #include @@ -81,6 +92,11 @@ #define T slist_int #include +static inline size_t +slist_int_hash(slist_int* a) +{ + return a->head ? (size_t)ctl_int32_hash((uint32_t)*slist_int_front(a)) : 0; +} #define T slist_int #include @@ -109,6 +125,11 @@ #define T pqu_int #include +static inline size_t +pqu_int_hash(pqu_int* a) +{ + return a->size ? (size_t)ctl_int32_hash((uint32_t)*pqu_int_top(a)) : 0; +} #define T pqu_int #include @@ -138,6 +159,11 @@ #define T queue_int #include +static inline size_t +queue_int_hash(queue_int* a) +{ + return a->size ? (size_t)ctl_int32_hash((uint32_t)*queue_int_front(a)) : 0; +} #define T queue_int #include @@ -166,6 +192,12 @@ #define T set_int #include +static inline size_t +set_int_hash(set_int* a) +{ + set_int_node *node = set_int_first(a); + return a->size ? (size_t)ctl_int32_hash((uint32_t)node->value) : 0; +} #define T set_int #include @@ -194,6 +226,11 @@ #define T stack_int #include +static inline size_t +stack_int_hash(stack_int* a) +{ + return a->size ? (size_t)ctl_int32_hash((uint32_t)*stack_int_top(a)) : 0; +} #define T stack_int #include @@ -222,9 +259,18 @@ #define T vec_int #include +static inline size_t +vec_int_hash(vec_int* a) +{ + return a->size ? (size_t)ctl_int32_hash((uint32_t)*vec_int_front(a)) : 0; +} #define T vec_int #include +static inline size_t int_hash(int *a) +{ + return (size_t)ctl_int32_hash((uint32_t)*a); +} #define POD #define T int #include @@ -279,6 +325,11 @@ #define T arr128_int #include +static inline size_t +arr128_int_hash(arr128_int* a) +{ + return (size_t)ctl_int32_hash((uint32_t)*arr128_int_front(a)); +} #define T arr128_int #include diff --git a/tests/func/test_generic_iter.cc b/tests/func/test_generic_iter.cc index bacd159f..561eadd1 100644 --- a/tests/func/test_generic_iter.cc +++ b/tests/func/test_generic_iter.cc @@ -38,6 +38,7 @@ OLD_MAIN #define T int #define INCLUDE_ALGORITHM #include +static inline size_t int_hash(int* a) { return (size_t)ctl_int32_hash((uint32_t)*a); } #define POD #define T int #define INCLUDE_ALGORITHM diff --git a/tests/func/test_generic_iter.h b/tests/func/test_generic_iter.h index 79584b8f..1ef7ebc3 100644 --- a/tests/func/test_generic_iter.h +++ b/tests/func/test_generic_iter.h @@ -338,7 +338,7 @@ void print_uset(uset_int *a) print_set(&aa) #define SETUP_USET1 \ - uset_int a = uset_int_init(NULL, NULL); \ + uset_int a = uset_int_init(); \ std::unordered_set b; \ for (int i = 0; i < 1 + TEST_RAND(TEST_MAX_SIZE - 1); i++) \ { \ @@ -348,7 +348,7 @@ void print_uset(uset_int *a) } \ print_uset(&a) #define SETUP_USET2 \ - uset_int aa = uset_int_init(NULL, NULL); \ + uset_int aa = uset_int_init(); \ std::unordered_set bb; \ for (int i = 0; i < TEST_RAND(25); i++) \ { \ diff --git a/tests/func/test_generic_iter2.cc b/tests/func/test_generic_iter2.cc index fc850902..08abc291 100644 --- a/tests/func/test_generic_iter2.cc +++ b/tests/func/test_generic_iter2.cc @@ -38,6 +38,7 @@ OLD_MAIN #define T int #define INCLUDE_ALGORITHM #include +static inline size_t int_hash(int* a) { return (size_t)ctl_int32_hash((uint32_t)*a); } #define POD #define T int #define INCLUDE_ALGORITHM diff --git a/tests/func/test_integral.cc b/tests/func/test_integral.cc index 4cfe0283..fdae892f 100644 --- a/tests/func/test_integral.cc +++ b/tests/func/test_integral.cc @@ -84,6 +84,19 @@ typedef unsigned long ulong; #define T uint8_t #include +static inline int long_equal(long *a, long *b) +{ + return *a == *b; +} +static inline size_t long_hash(long *a) +{ + long key = *a; + key = ((key >> 16) ^ key) * 0x45d9f3bul; + key = ((key >> 16) ^ key) * 0x45d9f3bul; + key = (key >> 16) ^ key; + return key; +} + #define POD #define T long #include @@ -141,7 +154,7 @@ int main(void) } TEST_LIST(vec_double, 1.0, 2.0); { - uset_long a = uset_long_init(NULL, NULL); + uset_long a = uset_long_init(); uset_long_insert(&a, 1L); // hash uset_long_it found = uset_long_find(&a, 1L); // equal assert(!uset_long_it_done(&found)); // equal diff --git a/tests/func/test_unordered_set.cc b/tests/func/test_unordered_set.cc index d13d6eb4..43fc4443 100644 --- a/tests/func/test_unordered_set.cc +++ b/tests/func/test_unordered_set.cc @@ -6,8 +6,8 @@ OLD_MAIN #include "digi.hh" -#define USE_INTERNAL_VERIFY #define T digi +#define USE_INTERNAL_VERIFY #define INCLUDE_ALGORITHM #include @@ -152,7 +152,7 @@ static void setup_sets(uset_digi *a, std::unordered_set &b) { size_t size = TEST_RAND(TEST_MAX_SIZE); LOG("\nsetup_uset %lu\n", size); - *a = uset_digi_init(digi_hash, digi_equal); + *a = uset_digi_init(); uset_digi_rehash(a, size); for (size_t inserts = 0; inserts < size; inserts++) { @@ -164,7 +164,7 @@ static void setup_sets(uset_digi *a, std::unordered_set &b) static void test_small_size(void) { - uset_digi a = uset_digi_init(digi_hash, digi_equal); + uset_digi a = uset_digi_init(); uset_digi_insert(&a, digi_init(1)); uset_digi_insert(&a, digi_init(2)); print_uset(&a); @@ -323,7 +323,7 @@ int main(void) } case TEST_SWAP: { aa = uset_digi_copy(&a); - aaa = uset_digi_init(digi_hash, digi_equal); + aaa = uset_digi_init(); bb = b; uset_digi_swap(&aaa, &aa); std::swap(bb, bbb); @@ -648,7 +648,6 @@ int main(void) break; } case TEST_MERGE: { - aa = uset_digi_init_from(&a); setup_sets(&aa, bb); print_uset(&a); print_uset(&aa); @@ -672,7 +671,6 @@ int main(void) uset_digi_it range_a1, range_a2; //std::unordered_set::iterator first_b1, last_b1, first_b2, last_b2; //get_random_iters(&a, &range_a1, b, first_b1, last_b1); - aa = uset_digi_init_from(&a); setup_sets(&aa, bb); range_a1 = uset_digi_begin(&a); range_a2 = uset_digi_begin(&aa); diff --git a/tests/func/test_unordered_set_sleep.c b/tests/func/test_unordered_set_sleep.c index ab7b26ec..e904314a 100644 --- a/tests/func/test_unordered_set_sleep.c +++ b/tests/func/test_unordered_set_sleep.c @@ -1,6 +1,13 @@ // Test that we detect DDOS attacks #include "../test.h" +static inline size_t +int_hash(int* a) +{ + (void)a; + return 0; +} + // default CTL_USET_SECURITY_COLLCOUNTING 2 #define POD #define T int @@ -26,18 +33,19 @@ inline void Sleep(uint32_t milliseconds) } #endif -static inline size_t -broken_hash(int* a) +/*static inline int +int_equal(int* a, int* b) { (void)a; + (void)b; return 0; -} +}*/ int main(void) { srand(0xbebe); const int size = 148; - uset_int a = uset_int_init(broken_hash, NULL); + uset_int a = uset_int_init(); uset_int_rehash(&a, size); for (int i = 0; i < size; i++) { diff --git a/tests/perf/perf_compile_c11.c b/tests/perf/perf_compile_c11.c index f37ac4ca..eb13d22d 100644 --- a/tests/perf/perf_compile_c11.c +++ b/tests/perf/perf_compile_c11.c @@ -58,6 +58,7 @@ #endif #include +static inline size_t int_hash(int *a) { return (size_t)ctl_int32_hash((uint32_t)*a); } #define POD #define T int #ifdef ALGORITHM @@ -177,16 +178,6 @@ static int compare_int(int *a, int *b) return *a < *b; } -static size_t hash_int(int *a) -{ - return *a; -} - -static int equal_int(int *a, int *b) -{ - return *a == *b; -} - static int compare_key_short(short *a, short *b) { return (*a == *b) ? 0 : (*a < *b) ? -1 : 1; @@ -231,7 +222,7 @@ void A(void) str g = str_init("test"); pqu_int i = pqu_int_init(compare_int); arr1024_int j = arr1024_int_init(); - uset_int k = uset_int_init(hash_int, equal_int); + uset_int k = uset_int_init(); for (int el = 0; el < N; el++) { diff --git a/tests/perf/uset/perf_uset_erase.c b/tests/perf/uset/perf_uset_erase.c index 19be8213..2b308256 100644 --- a/tests/perf/uset/perf_uset_erase.c +++ b/tests/perf/uset/perf_uset_erase.c @@ -1,19 +1,19 @@ #include "../../test.h" +static size_t int_hash(int* a) { return *a; } +//static int int_equal(int* a, int* b) { return *a == *b; } + #define POD #define T int #include -static size_t int_hash(int* a) { return *a; } -static int int_equal(int* a, int* b) { return *a == *b; } - int main(void) { puts(__FILE__); srand(0xbeef); for(int run = 0; run < TEST_PERF_RUNS; run++) { - uset_int c = uset_int_init(int_hash, int_equal); + uset_int c = uset_int_init(); int elems = TEST_PERF_CHUNKS * run; for(int elem = 0; elem < elems; elem++) uset_int_insert(&c, rand() % elems); diff --git a/tests/perf/uset/perf_uset_insert.c b/tests/perf/uset/perf_uset_insert.c index e243d834..80ca4350 100644 --- a/tests/perf/uset/perf_uset_insert.c +++ b/tests/perf/uset/perf_uset_insert.c @@ -1,21 +1,21 @@ #include "../../test.h" +static size_t int_hash(int* a) { return *a; } +//static int int_equal(int* a, int* b) { return *a == *b; } + #define POD #define T int #include #include -static size_t int_hash(int* a) { return *a; } -static int int_equal(int* a, int* b) { return *a == *b; } - int main(void) { puts(__FILE__); srand(0xbeef); for(int run = 0; run < TEST_PERF_RUNS; run++) { - uset_int c = uset_int_init(int_hash, int_equal); + uset_int c = uset_int_init(); int elems = TEST_PERF_CHUNKS * run; long t0 = TEST_TIME(); for(int elem = 0; elem < elems; elem++) diff --git a/tests/perf/uset/perf_uset_iterate.c b/tests/perf/uset/perf_uset_iterate.c index 17d68e3c..233a57f9 100644 --- a/tests/perf/uset/perf_uset_iterate.c +++ b/tests/perf/uset/perf_uset_iterate.c @@ -1,19 +1,19 @@ #include "../../test.h" +static size_t int_hash(int* a) { return *a; } +//static int int_equal(int* a, int* b) { return *a == *b; } + #define POD #define T int #include #include -static size_t int_hash(int* a) { return *a; } -static int int_equal(int* a, int* b) { return *a == *b; } - int main(void) { puts(__FILE__); srand(0xbeef); - uset_int c = uset_int_init(int_hash, int_equal); + uset_int c = uset_int_init(); for(int run = 0; run < TEST_PERF_RUNS; run++) { unsigned int elems = TEST_PERF_CHUNKS * run;