Changed phashmap_t to dynamically resize hash table. (nw)

This commit is contained in:
couriersud 2015-07-19 20:52:24 +02:00
parent c0c76edc28
commit bf7c6d585b

View File

@ -12,6 +12,7 @@
#include <cstring>
#include <algorithm>
#include <cmath>
#include "palloc.h"
#include "pstring.h"
@ -66,6 +67,11 @@ public:
ATTR_HOT std::size_t size() const { return m_capacity; }
void resize(const std::size_t new_size)
{
set_capacity(new_size);
}
protected:
ATTR_COLD void set_capacity(const std::size_t new_capacity)
{
@ -614,13 +620,15 @@ public:
template <class C>
struct phash_functor
{
unsigned hash(const C &v) { return (unsigned) v; }
unsigned hash(const C &v) const { return (unsigned) v; }
};
template <>
struct phash_functor<pstring>
{
unsigned hash(const pstring &v)
#if 1
#if 1
unsigned hash(const pstring &v) const
{
const char *string = v.cstr();
unsigned result = *string++;
@ -628,19 +636,53 @@ struct phash_functor<pstring>
result = (result*33) ^ c;
return result;
}
#else
unsigned hash(const pstring &v) const
{
/* FowlerNollVo hash - FNV-1 */
const char *string = v.cstr();
unsigned result = 2166136261;
for (UINT8 c = *string++; c != 0; c = *string++)
result = (result * 16777619) ^ c;
// result = (result ^ c) * 16777619; FNV 1a
return result;
}
#endif
#else
unsigned hash(const pstring &v) const
{
/* jenkins one at a time algo */
unsigned result = 0;
const char *string = v.cstr();
while (*string)
{
result += *string;
string++;
result += (result << 10);
result ^= (result >> 6);
}
result += (result << 3);
result ^= (result >> 11);
result += (result << 15);
return result;
}
#endif
};
/* some primes 53, 97, 193, 389, 769, 1543, 3079, 6151 */
template <class K, class V, class H = phash_functor<K> >
class phashmap_t
{
public:
phashmap_t() : m_hash(389)
phashmap_t() : m_hash(17)
{
for (unsigned i=0; i<m_hash.size(); i++)
m_hash[i] = -1;
}
~phashmap_t()
{
}
struct element_t
{
element_t() { }
@ -655,6 +697,18 @@ public:
void clear()
{
if (0)
{
unsigned cnt = 0;
for (unsigned i=0; i<m_hash.size(); i++)
if (m_hash[i] >= 0)
cnt++;
const unsigned s = m_values.size();
if (s>0)
printf("phashmap: %d elements %d hashsize, percent in overflow: %d\n", s, (unsigned) m_hash.size(), (s - cnt) * 100 / s);
else
printf("phashmap: No elements .. \n");
}
m_values.clear();
for (unsigned i=0; i<m_hash.size(); i++)
m_hash[i] = -1;
@ -674,7 +728,22 @@ public:
bool add(const K &key, const V &value)
{
H h;
/*
* we are using the Euler prime function here
*
* n * n + n + 41 | 40 >= n >=0
*
* and accept that outside we will not have a prime
*
*/
if (m_values.size() > m_hash.size())
{
unsigned n = std::sqrt( 2 * m_hash.size());
n = n * n + n + 41;
m_hash.resize(n);
rebuild();
}
const H h;
const unsigned hash=h.hash(key);
const unsigned pos = hash % m_hash.size();
if (m_hash[pos] == -1)
@ -740,6 +809,18 @@ private:
return -1;
}
void rebuild()
{
for (unsigned i=0; i<m_hash.size(); i++)
m_hash[i] = -1;
for (unsigned i=0; i<m_values.size(); i++)
{
unsigned pos = m_values[i].m_hash % m_hash.size();
m_values[i].m_next = m_hash[pos];
m_hash[pos] = i;
}
}
plist_t<element_t> m_values;
parray_t<int> m_hash;
};