diff --git a/crates/yglnk-core/src/hash_table.rs b/crates/yglnk-core/src/hash_table.rs index 4d69388..a5f50d2 100644 --- a/crates/yglnk-core/src/hash_table.rs +++ b/crates/yglnk-core/src/hash_table.rs @@ -3,7 +3,8 @@ use xxhash_rust::xxh64::xxh64; #[derive(Clone, Copy, Debug)] pub struct Settings { - pub seed: u64, + pub seed: u32, + pub entsize: u16, pub blshift: u8, } @@ -32,10 +33,16 @@ pub struct Iter<'a> { } #[derive(Clone, Copy, Debug)] -pub struct Value { +pub struct Value { pub typ: u32, - pub lhs: u64, - pub rhs: u64, + pub rest: R, +} + +impl Settings { + #[inline] + pub fn chain_entry_size(&self) -> usize { + 16 * (1 + usize::from(self.entsize)) + } } impl Header { @@ -43,26 +50,24 @@ impl Header { if data.len() < 20 { return None; } - let mut seed = u64::from(data[15]) << 32; - seed += u64::from(u32::from_be_bytes(data[16..20].try_into().unwrap())); Some(Self { strtab_link: u32::from_be_bytes(data[0..4].try_into().unwrap()), nbuckets: u32::from_be_bytes(data[4..8].try_into().unwrap()), nchains: u32::from_be_bytes(data[8..12].try_into().unwrap()), - nblf: u16::from_be_bytes(data[12..14].try_into().unwrap()), + nblf: u16::from_be_bytes(data[14..16].try_into().unwrap()), settings: Settings { - blshift: data[14], - seed, + entsize: u16::from_be_bytes(data[12..14].try_into().unwrap()), + blshift: data[16], + // to make this easier, we first fold the blshift into the seed, + // and immediately drop that after the conversion + seed: u32::from_be_bytes(data[16..20].try_into().unwrap()) & 0xffffff, }, }) } pub fn tabsize(&self) -> usize { - let tmp: usize = 4 - + usize::try_from(self.nbuckets).unwrap() - + 8 * usize::try_from(self.nchains).unwrap() - + 2 * usize::from(self.nblf); - 4 * tmp + let tmp: usize = 4 + usize::try_from(self.nbuckets).unwrap() + 2 * usize::from(self.nblf); + 4 * tmp + self.settings.chain_entry_size() * usize::try_from(self.nchains).unwrap() } } @@ -71,15 +76,14 @@ pub fn hash_trf(h: u64, items: usize, div: usize) -> usize { div * usize::try_from(h % u64::try_from(items / div).unwrap()).unwrap() } -impl Value { - pub fn parse(entry: &[u8]) -> Option { - if entry.len() < 32 { +impl<'a> Value<&'a [u8]> { + pub fn parse(entry: &'a [u8]) -> Option { + if entry.len() < 16 { return None; } Some(Self { typ: u32::from_be_bytes(entry[12..16].try_into().unwrap()), - lhs: u64::from_be_bytes(entry[16..24].try_into().unwrap()), - rhs: u64::from_be_bytes(entry[24..32].try_into().unwrap()), + rest: &entry[16..], }) } } @@ -97,7 +101,10 @@ impl<'a> Ref<'a> { let bloom_end: usize = 16 + 8 * usize::from(header.nblf); let buckets_end = bloom_end + 4 * uf(header.nbuckets).unwrap(); - let chains_end = buckets_end + 32 * uf(header.nchains).unwrap(); + let chains_end = + buckets_end + header.settings.chain_entry_size() * uf(header.nchains).unwrap(); + assert_eq!(data.len(), chains_end); + Some(Ref { settings: header.settings, strtab: alldata.get(usize::try_from(header.strtab_link).ok()?..)?, @@ -108,7 +115,7 @@ impl<'a> Ref<'a> { } /// NOTE: the key is truncated after the first null byte - pub fn lookup(&self, key: &[u8]) -> Option { + pub fn lookup(&self, key: &[u8]) -> Option> { let key = trunc_key_at0(key); let (h, blmask) = self.settings.translate_key(key); @@ -126,7 +133,12 @@ impl<'a> Ref<'a> { )) .ok()?; - for sel in self.chains.chunks(32).take(chain_start) { + for sel in self + .chains + .chunks(self.settings.chain_entry_size()) + .take(chain_start) + { + assert!(sel.len() >= 16); let e_hash = u64::from_be_bytes(sel[0..8].try_into().unwrap()); if (h | 1) == (e_hash | 1) { let e_name_ix = @@ -134,7 +146,7 @@ impl<'a> Ref<'a> { let e_name = trunc_key_at0(self.strtab.get(e_name_ix..)?); if e_name == key { - return Value::parse(sel); + return Some(Value::parse(sel).unwrap()); } } @@ -155,7 +167,7 @@ impl<'a> Ref<'a> { } impl<'a> Iterator for Iter<'a> { - type Item = (u64, &'a [u8], Value); + type Item = (u64, &'a [u8], Value<&'a [u8]>); fn next(&mut self) -> Option { if self.chains.len() < 32 { @@ -176,7 +188,7 @@ impl<'a> Iterator for Iter<'a> { impl Settings { /// NOTE: the key must be truncated first using [`trunc_key_at0`] pub fn translate_key(&self, key: &[u8]) -> (u64, u64) { - let h = xxh64(key, self.seed); + let h = xxh64(key, self.seed.into()); let blmask = (1 << (h % 64)) | (1 << ((h >> self.blshift) % 64)); (h, blmask) } diff --git a/crates/yglnk-ls/src/main.rs b/crates/yglnk-ls/src/main.rs index cc18b78..fe8559a 100644 --- a/crates/yglnk-ls/src/main.rs +++ b/crates/yglnk-ls/src/main.rs @@ -43,7 +43,12 @@ fn name_to_txt(name: &[u8]) -> String { if let Ok(name) = core::str::from_utf8(name) { format!("{:?}", name) } else { - format!("{:?}", name) + "0x".chars() + .chain( + name.iter() + .flat_map(|i| format!("{:02x}", i).chars().collect::>()), + ) + .collect() } } @@ -64,31 +69,27 @@ fn main() { .expect("unable to parse table header"); println!("location\tmeta\t\tname\t\ttyp\t\trest"); for i in ltr { - print!( - "{:08x}\t{:08x}\t{}\t{}\t", + println!( + "{:08x}\t{:08x}\t{}\t{}\t{}", i.location, i.meta, name_to_txt(i.name), typ_to_txt(i.typ), + name_to_txt(i.rest), ); - for j in i.rest { - print!("{:02x}", j); - } - println!(""); } } Command::Hash => { let ht = yglnk_core::hash_table::Ref::parse(data, cli.location) .expect("unable to parse table header"); - println!("hash\tL\tR\tname\t\ttyp"); + println!("hash\t\tname\t\ttyp\trest"); for (hash, name, value) in ht.iter() { println!( - "{:016x}\t{:016x}\t{:016x}\t{}\t{}", + "{:016x}\t{}\t{}\t{}", hash, - value.lhs, - value.rhs, name_to_txt(name), - typ_to_txt(value.typ) + typ_to_txt(value.typ), + name_to_txt(value.rest), ); } } @@ -97,12 +98,9 @@ fn main() { .expect("unable to parse table header"); match ht.lookup(key.as_bytes()) { None => println!("(none)"), - Some(value) => println!( - "typ={} L={:016x} R={:016x}", - typ_to_txt(value.typ), - value.lhs, - value.rhs - ), + Some(value) => { + println!("typ={} {}", typ_to_txt(value.typ), name_to_txt(value.rest),) + } } } Command::X2dhc => unimplemented!(), diff --git a/docs/index.gmi b/docs/index.gmi index 8d740df..539122a 100644 --- a/docs/index.gmi +++ b/docs/index.gmi @@ -85,9 +85,9 @@ list entry := name[4b] type[4b] location[4b] meta[4b] rest[*] A hash table, using 64bit-xxHash. chains are traversed in order, the last bit of the first 8b of a chain entry is used to indicate if another entry follows (0 = last entry), the rest contains the hash. ``` -ht header := strtab_link[4b] nbuckets[4b] nchains[4b] nblf[2b] blshift[1b] seed[5b] -ht body := bloom[8b * nlbf] buckets[4b * nbuckets] chains[32b * nchains] -chain entry := hash[8b] name[4b] type[4b] value[16b] +ht header := strtab_link[4b] nbuckets[4b] nchains[4b] entsize[2b] nblf[2b] blshift[1b] seed[3b] +ht body := bloom[8b * nlbf] buckets[4b * nbuckets] chains[16b * (1 + entsize) * nchains] +chain entry := hash[8b] name[4b] type[4b] rest[16b * entsize] ``` ## 2D "hilbert curve" tables