/* * String hash computation (interning). */ #include "duk_internal.h" /* constants for duk_hashstring() */ #define DUK__STRHASH_SHORTSTRING 4096L #define DUK__STRHASH_MEDIUMSTRING (256L * 1024L) #define DUK__STRHASH_BLOCKSIZE 256L DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, duk_uint8_t *str, duk_size_t len) { /* * Sampling long strings by byte skipping (like Lua does) is potentially * a cache problem. Here we do 'block skipping' instead for long strings: * hash an initial part, and then sample the rest of the string with * reasonably sized chunks. * * Skip should depend on length and bound the total time to roughly * logarithmic. * * With current values: * * 1M string => 256 * 241 = 61696 bytes (0.06M) of hashing * 1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing * * After an initial part has been hashed, an offset is applied before * starting the sampling. The initial offset is computed from the * hash of the initial part of the string. The idea is to avoid the * case that all long strings have certain offset ranges that are never * sampled. */ /* note: mixing len into seed improves hashing when skipping */ duk_uint32_t str_seed = heap->hash_seed ^ len; if (len <= DUK__STRHASH_SHORTSTRING) { return duk_util_hashbytes(str, len, str_seed); } else { duk_uint32_t hash; duk_size_t off; duk_size_t skip; if (len <= DUK__STRHASH_MEDIUMSTRING) { skip = (duk_size_t) (16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE); } else { skip = (duk_size_t) (256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE); } hash = duk_util_hashbytes(str, (duk_size_t) DUK__STRHASH_SHORTSTRING, str_seed); off = DUK__STRHASH_SHORTSTRING + (skip * (hash % 256)) / 256; /* XXX: inefficient loop */ while (off < len) { duk_size_t left = len - off; duk_size_t now = (duk_size_t) (left > DUK__STRHASH_BLOCKSIZE ? DUK__STRHASH_BLOCKSIZE : left); hash ^= duk_util_hashbytes(str + off, now, str_seed); off += skip; } return hash; } }