hash tables: allow per-table custom entry size
This commit is contained in:
parent
a3c3228c1a
commit
aadedaa1a3
3 changed files with 56 additions and 46 deletions
|
@ -3,7 +3,8 @@ use xxhash_rust::xxh64::xxh64;
|
|||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Settings {
|
||||
pub seed: u64,
|
||||
pub seed: u32,
|
||||
pub entsize: u16,
|
||||
pub blshift: u8,
|
||||
}
|
||||
|
||||
|
@ -32,10 +33,16 @@ pub struct Iter<'a> {
|
|||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Value {
|
||||
pub struct Value<R> {
|
||||
pub typ: u32,
|
||||
pub lhs: u64,
|
||||
pub rhs: u64,
|
||||
pub rest: R,
|
||||
}
|
||||
|
||||
impl Settings {
|
||||
#[inline]
|
||||
pub fn chain_entry_size(&self) -> usize {
|
||||
16 * (1 + usize::from(self.entsize))
|
||||
}
|
||||
}
|
||||
|
||||
impl Header {
|
||||
|
@ -43,26 +50,24 @@ impl Header {
|
|||
if data.len() < 20 {
|
||||
return None;
|
||||
}
|
||||
let mut seed = u64::from(data[15]) << 32;
|
||||
seed += u64::from(u32::from_be_bytes(data[16..20].try_into().unwrap()));
|
||||
Some(Self {
|
||||
strtab_link: u32::from_be_bytes(data[0..4].try_into().unwrap()),
|
||||
nbuckets: u32::from_be_bytes(data[4..8].try_into().unwrap()),
|
||||
nchains: u32::from_be_bytes(data[8..12].try_into().unwrap()),
|
||||
nblf: u16::from_be_bytes(data[12..14].try_into().unwrap()),
|
||||
nblf: u16::from_be_bytes(data[14..16].try_into().unwrap()),
|
||||
settings: Settings {
|
||||
blshift: data[14],
|
||||
seed,
|
||||
entsize: u16::from_be_bytes(data[12..14].try_into().unwrap()),
|
||||
blshift: data[16],
|
||||
// to make this easier, we first fold the blshift into the seed,
|
||||
// and immediately drop that after the conversion
|
||||
seed: u32::from_be_bytes(data[16..20].try_into().unwrap()) & 0xffffff,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
pub fn tabsize(&self) -> usize {
|
||||
let tmp: usize = 4
|
||||
+ usize::try_from(self.nbuckets).unwrap()
|
||||
+ 8 * usize::try_from(self.nchains).unwrap()
|
||||
+ 2 * usize::from(self.nblf);
|
||||
4 * tmp
|
||||
let tmp: usize = 4 + usize::try_from(self.nbuckets).unwrap() + 2 * usize::from(self.nblf);
|
||||
4 * tmp + self.settings.chain_entry_size() * usize::try_from(self.nchains).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,15 +76,14 @@ pub fn hash_trf(h: u64, items: usize, div: usize) -> usize {
|
|||
div * usize::try_from(h % u64::try_from(items / div).unwrap()).unwrap()
|
||||
}
|
||||
|
||||
impl Value {
|
||||
pub fn parse(entry: &[u8]) -> Option<Self> {
|
||||
if entry.len() < 32 {
|
||||
impl<'a> Value<&'a [u8]> {
|
||||
pub fn parse(entry: &'a [u8]) -> Option<Self> {
|
||||
if entry.len() < 16 {
|
||||
return None;
|
||||
}
|
||||
Some(Self {
|
||||
typ: u32::from_be_bytes(entry[12..16].try_into().unwrap()),
|
||||
lhs: u64::from_be_bytes(entry[16..24].try_into().unwrap()),
|
||||
rhs: u64::from_be_bytes(entry[24..32].try_into().unwrap()),
|
||||
rest: &entry[16..],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -97,7 +101,10 @@ impl<'a> Ref<'a> {
|
|||
|
||||
let bloom_end: usize = 16 + 8 * usize::from(header.nblf);
|
||||
let buckets_end = bloom_end + 4 * uf(header.nbuckets).unwrap();
|
||||
let chains_end = buckets_end + 32 * uf(header.nchains).unwrap();
|
||||
let chains_end =
|
||||
buckets_end + header.settings.chain_entry_size() * uf(header.nchains).unwrap();
|
||||
assert_eq!(data.len(), chains_end);
|
||||
|
||||
Some(Ref {
|
||||
settings: header.settings,
|
||||
strtab: alldata.get(usize::try_from(header.strtab_link).ok()?..)?,
|
||||
|
@ -108,7 +115,7 @@ impl<'a> Ref<'a> {
|
|||
}
|
||||
|
||||
/// NOTE: the key is truncated after the first null byte
|
||||
pub fn lookup(&self, key: &[u8]) -> Option<Value> {
|
||||
pub fn lookup(&self, key: &[u8]) -> Option<Value<&'a [u8]>> {
|
||||
let key = trunc_key_at0(key);
|
||||
let (h, blmask) = self.settings.translate_key(key);
|
||||
|
||||
|
@ -126,7 +133,12 @@ impl<'a> Ref<'a> {
|
|||
))
|
||||
.ok()?;
|
||||
|
||||
for sel in self.chains.chunks(32).take(chain_start) {
|
||||
for sel in self
|
||||
.chains
|
||||
.chunks(self.settings.chain_entry_size())
|
||||
.take(chain_start)
|
||||
{
|
||||
assert!(sel.len() >= 16);
|
||||
let e_hash = u64::from_be_bytes(sel[0..8].try_into().unwrap());
|
||||
if (h | 1) == (e_hash | 1) {
|
||||
let e_name_ix =
|
||||
|
@ -134,7 +146,7 @@ impl<'a> Ref<'a> {
|
|||
let e_name = trunc_key_at0(self.strtab.get(e_name_ix..)?);
|
||||
|
||||
if e_name == key {
|
||||
return Value::parse(sel);
|
||||
return Some(Value::parse(sel).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -155,7 +167,7 @@ impl<'a> Ref<'a> {
|
|||
}
|
||||
|
||||
impl<'a> Iterator for Iter<'a> {
|
||||
type Item = (u64, &'a [u8], Value);
|
||||
type Item = (u64, &'a [u8], Value<&'a [u8]>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.chains.len() < 32 {
|
||||
|
@ -176,7 +188,7 @@ impl<'a> Iterator for Iter<'a> {
|
|||
impl Settings {
|
||||
/// NOTE: the key must be truncated first using [`trunc_key_at0`]
|
||||
pub fn translate_key(&self, key: &[u8]) -> (u64, u64) {
|
||||
let h = xxh64(key, self.seed);
|
||||
let h = xxh64(key, self.seed.into());
|
||||
let blmask = (1 << (h % 64)) | (1 << ((h >> self.blshift) % 64));
|
||||
(h, blmask)
|
||||
}
|
||||
|
|
|
@ -43,7 +43,12 @@ fn name_to_txt(name: &[u8]) -> String {
|
|||
if let Ok(name) = core::str::from_utf8(name) {
|
||||
format!("{:?}", name)
|
||||
} else {
|
||||
format!("{:?}", name)
|
||||
"0x".chars()
|
||||
.chain(
|
||||
name.iter()
|
||||
.flat_map(|i| format!("{:02x}", i).chars().collect::<Vec<_>>()),
|
||||
)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,31 +69,27 @@ fn main() {
|
|||
.expect("unable to parse table header");
|
||||
println!("location\tmeta\t\tname\t\ttyp\t\trest");
|
||||
for i in ltr {
|
||||
print!(
|
||||
"{:08x}\t{:08x}\t{}\t{}\t",
|
||||
println!(
|
||||
"{:08x}\t{:08x}\t{}\t{}\t{}",
|
||||
i.location,
|
||||
i.meta,
|
||||
name_to_txt(i.name),
|
||||
typ_to_txt(i.typ),
|
||||
name_to_txt(i.rest),
|
||||
);
|
||||
for j in i.rest {
|
||||
print!("{:02x}", j);
|
||||
}
|
||||
println!("");
|
||||
}
|
||||
}
|
||||
Command::Hash => {
|
||||
let ht = yglnk_core::hash_table::Ref::parse(data, cli.location)
|
||||
.expect("unable to parse table header");
|
||||
println!("hash\tL\tR\tname\t\ttyp");
|
||||
println!("hash\t\tname\t\ttyp\trest");
|
||||
for (hash, name, value) in ht.iter() {
|
||||
println!(
|
||||
"{:016x}\t{:016x}\t{:016x}\t{}\t{}",
|
||||
"{:016x}\t{}\t{}\t{}",
|
||||
hash,
|
||||
value.lhs,
|
||||
value.rhs,
|
||||
name_to_txt(name),
|
||||
typ_to_txt(value.typ)
|
||||
typ_to_txt(value.typ),
|
||||
name_to_txt(value.rest),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -97,12 +98,9 @@ fn main() {
|
|||
.expect("unable to parse table header");
|
||||
match ht.lookup(key.as_bytes()) {
|
||||
None => println!("(none)"),
|
||||
Some(value) => println!(
|
||||
"typ={} L={:016x} R={:016x}",
|
||||
typ_to_txt(value.typ),
|
||||
value.lhs,
|
||||
value.rhs
|
||||
),
|
||||
Some(value) => {
|
||||
println!("typ={} {}", typ_to_txt(value.typ), name_to_txt(value.rest),)
|
||||
}
|
||||
}
|
||||
}
|
||||
Command::X2dhc => unimplemented!(),
|
||||
|
|
|
@ -85,9 +85,9 @@ list entry := name[4b] type[4b] location[4b] meta[4b] rest[*]
|
|||
A hash table, using 64bit-xxHash. chains are traversed in order, the last bit of the first 8b of a chain entry is used to indicate if another entry follows (0 = last entry), the rest contains the hash.
|
||||
|
||||
```
|
||||
ht header := strtab_link[4b] nbuckets[4b] nchains[4b] nblf[2b] blshift[1b] seed[5b]
|
||||
ht body := bloom[8b * nlbf] buckets[4b * nbuckets] chains[32b * nchains]
|
||||
chain entry := hash[8b] name[4b] type[4b] value[16b]
|
||||
ht header := strtab_link[4b] nbuckets[4b] nchains[4b] entsize[2b] nblf[2b] blshift[1b] seed[3b]
|
||||
ht body := bloom[8b * nlbf] buckets[4b * nbuckets] chains[16b * (1 + entsize) * nchains]
|
||||
chain entry := hash[8b] name[4b] type[4b] rest[16b * entsize]
|
||||
```
|
||||
|
||||
## 2D "hilbert curve" tables
|
||||
|
|
Loading…
Reference in a new issue