core: factor out HashTableSettings

This commit is contained in:
Alain Zscheile 2023-01-08 09:04:46 +01:00
parent 3ad174908b
commit ac037f5a78

View file

@ -1,21 +1,25 @@
use xxhash_rust::xxh64::xxh64;
#[derive(Clone, Copy, Debug)]
pub struct HashTableSettings {
pub seed: u64,
pub blshift: u8,
}
#[derive(Clone, Copy, Debug)]
pub struct HashTableHeader {
pub strtab_link: u32,
pub nbuckets: u32,
pub nblf: u16,
pub blshift: u8,
pub seed: u64,
pub settings: HashTableSettings,
}
pub struct HashTableRef<'a> {
seed: u64,
strtab: &'a [u8],
bloom: &'a [u8],
buckets: &'a [u8],
chains: &'a [u8],
blshift: u8,
settings: HashTableSettings,
}
impl HashTableHeader {
@ -29,8 +33,10 @@ impl HashTableHeader {
strtab_link: u32::from_be_bytes(data[0..4].try_into().unwrap()),
nbuckets: u32::from_be_bytes(data[4..8].try_into().unwrap()),
nblf: u16::from_be_bytes(data[8..10].try_into().unwrap()),
blshift: data[10],
seed,
settings: HashTableSettings {
blshift: data[10],
seed,
},
})
}
@ -45,6 +51,11 @@ pub fn trunc_key_at0(key: &[u8]) -> &[u8] {
&key[..key_end]
}
// hash -> index conversion/transformation helper
pub fn hash_trf(h: u64, items: usize, div: usize) -> usize {
div * usize::try_from(h % u64::try_from(items / div).unwrap()).unwrap()
}
impl<'a> HashTableRef<'a> {
/// `location` should be the offset where the hash table is present (in units of 16 bytes)
pub fn parse(data: &'a [u8], location: u32, entsize: u16, entcount: u16) -> Option<Self> {
@ -61,8 +72,7 @@ impl<'a> HashTableRef<'a> {
let bloom_end: usize = 16 + 8 * usize::from(header.nblf);
let buckets_end = bloom_end + 4 * uf(header.nbuckets).unwrap();
Some(HashTableRef {
blshift: header.blshift,
seed: header.seed,
settings: header.settings,
strtab: alldata.get(usize::try_from(header.strtab_link).ok()?..)?,
bloom: &data[16..bloom_end],
buckets: &data[bloom_end..buckets_end],
@ -72,25 +82,18 @@ impl<'a> HashTableRef<'a> {
/// NOTE: the key is truncated after the first null byte
pub fn lookup(&self, key: &[u8]) -> Option<(u32, u64, u64)> {
// hash -> index conversion/transformation helper
fn htr(h: u64, items: usize, div: usize) -> usize {
div * usize::try_from(h % u64::try_from(items / div).unwrap()).unwrap()
}
let key = trunc_key_at0(key);
let h = xxh64(key, self.seed);
let (h, blmask) = self.settings.translate_key(key);
// check bloom filter
let blsel = htr(h / 64, self.bloom.len(), 8);
let blsel = hash_trf(h / 64, self.bloom.len(), 8);
let blword = u64::from_be_bytes(self.bloom[blsel..blsel + 8].try_into().unwrap());
let blmask: u64 = (1 << (h % 64)) | (1 << ((h >> self.blshift) % 64));
if (blword & blmask) != blmask {
return None;
}
// retrieve bucket/chain start index
let bkid = htr(h, self.buckets.len(), 4);
let bkid = hash_trf(h, self.buckets.len(), 4);
let chain_start = usize::try_from(u32::from_be_bytes(
self.buckets[bkid..bkid + 4].try_into().unwrap(),
))
@ -120,3 +123,12 @@ impl<'a> HashTableRef<'a> {
return None;
}
}
impl HashTableSettings {
/// NOTE: the key must be truncated first using [`trunc_key_at0`]
pub fn translate_key(&self, key: &[u8]) -> (u64, u64) {
let h = xxh64(key, self.seed);
let blmask = (1 << (h % 64)) | (1 << ((h >> self.blshift) % 64));
(h, blmask)
}
}