get rid of the name-repetition in HashTable stuff

This commit is contained in:
Alain Zscheile 2023-01-09 09:30:47 +01:00
parent 9813001721
commit a3c3228c1a
3 changed files with 30 additions and 31 deletions

View file

@ -1,43 +1,44 @@
use crate::trunc_key_at0;
use xxhash_rust::xxh64::xxh64;
#[derive(Clone, Copy, Debug)]
pub struct HashTableSettings {
pub struct Settings {
pub seed: u64,
pub blshift: u8,
}
#[derive(Clone, Copy, Debug)]
pub struct HashTableHeader {
pub struct Header {
pub strtab_link: u32,
pub nbuckets: u32,
pub nchains: u32,
pub nblf: u16,
pub settings: HashTableSettings,
pub settings: Settings,
}
#[derive(Clone)]
pub struct HashTableRef<'a> {
pub struct Ref<'a> {
strtab: &'a [u8],
bloom: &'a [u8],
buckets: &'a [u8],
chains: &'a [u8],
settings: HashTableSettings,
settings: Settings,
}
#[derive(Clone)]
pub struct HashTableIter<'a> {
pub struct Iter<'a> {
strtab: &'a [u8],
chains: &'a [u8],
}
#[derive(Clone, Copy, Debug)]
pub struct HashTableValue {
pub struct Value {
pub typ: u32,
pub lhs: u64,
pub rhs: u64,
}
impl HashTableHeader {
impl Header {
pub fn parse(data: &[u8]) -> Option<Self> {
if data.len() < 20 {
return None;
@ -49,7 +50,7 @@ impl HashTableHeader {
nbuckets: u32::from_be_bytes(data[4..8].try_into().unwrap()),
nchains: u32::from_be_bytes(data[8..12].try_into().unwrap()),
nblf: u16::from_be_bytes(data[12..14].try_into().unwrap()),
settings: HashTableSettings {
settings: Settings {
blshift: data[14],
seed,
},
@ -65,17 +66,12 @@ impl HashTableHeader {
}
}
pub fn trunc_key_at0(key: &[u8]) -> &[u8] {
let key_end = key.iter().take_while(|&&i| i != 0).count();
&key[..key_end]
}
// hash -> index conversion/transformation helper
pub fn hash_trf(h: u64, items: usize, div: usize) -> usize {
div * usize::try_from(h % u64::try_from(items / div).unwrap()).unwrap()
}
impl HashTableValue {
impl Value {
pub fn parse(entry: &[u8]) -> Option<Self> {
if entry.len() < 32 {
return None;
@ -88,7 +84,7 @@ impl HashTableValue {
}
}
impl<'a> HashTableRef<'a> {
impl<'a> Ref<'a> {
/// `location` should be the offset where the hash table is present (in units of 16 bytes)
pub fn parse(data: &'a [u8], location: u32) -> Option<Self> {
let alldata = data;
@ -96,13 +92,13 @@ impl<'a> HashTableRef<'a> {
let offset = crate::decode_location(location);
let data = data.get(offset..)?;
let header = HashTableHeader::parse(data)?;
let header = Header::parse(data)?;
let data = data.get(..header.tabsize())?;
let bloom_end: usize = 16 + 8 * usize::from(header.nblf);
let buckets_end = bloom_end + 4 * uf(header.nbuckets).unwrap();
let chains_end = buckets_end + 32 * uf(header.nchains).unwrap();
Some(HashTableRef {
Some(Ref {
settings: header.settings,
strtab: alldata.get(usize::try_from(header.strtab_link).ok()?..)?,
bloom: &data[16..bloom_end],
@ -112,7 +108,7 @@ impl<'a> HashTableRef<'a> {
}
/// NOTE: the key is truncated after the first null byte
pub fn lookup(&self, key: &[u8]) -> Option<HashTableValue> {
pub fn lookup(&self, key: &[u8]) -> Option<Value> {
let key = trunc_key_at0(key);
let (h, blmask) = self.settings.translate_key(key);
@ -138,7 +134,7 @@ impl<'a> HashTableRef<'a> {
let e_name = trunc_key_at0(self.strtab.get(e_name_ix..)?);
if e_name == key {
return HashTableValue::parse(sel);
return Value::parse(sel);
}
}
@ -150,16 +146,16 @@ impl<'a> HashTableRef<'a> {
return None;
}
pub fn iter(&self) -> HashTableIter<'a> {
HashTableIter {
pub fn iter(&self) -> Iter<'a> {
Iter {
strtab: self.strtab,
chains: self.chains,
}
}
}
impl<'a> Iterator for HashTableIter<'a> {
type Item = (u64, &'a [u8], HashTableValue);
impl<'a> Iterator for Iter<'a> {
type Item = (u64, &'a [u8], Value);
fn next(&mut self) -> Option<Self::Item> {
if self.chains.len() < 32 {
@ -172,12 +168,12 @@ impl<'a> Iterator for HashTableIter<'a> {
Some((
u64::from_be_bytes(i[0..8].try_into().unwrap()),
e_name,
HashTableValue::parse(i).unwrap(),
Value::parse(i).unwrap(),
))
}
}
impl HashTableSettings {
impl Settings {
/// NOTE: the key must be truncated first using [`trunc_key_at0`]
pub fn translate_key(&self, key: &[u8]) -> (u64, u64) {
let h = xxh64(key, self.seed);

View file

@ -3,11 +3,9 @@
pub use int_enum::{IntEnum, IntEnumError};
pub mod hash_table;
pub mod hilbert;
mod ht;
pub use ht::*;
pub const MAGIC: [u8; 4] = [b'Y', b'g', b'L', b'n'];
#[derive(Clone, Copy, Debug, IntEnum)]
@ -74,6 +72,11 @@ pub enum Ntt01 {
Code = 0x0004,
}
pub fn trunc_key_at0(key: &[u8]) -> &[u8] {
let key_end = key.iter().take_while(|&&i| i != 0).count();
&key[..key_end]
}
#[derive(Clone, Copy, Debug)]
pub struct LinearTableEntry<R> {
pub name: R,

View file

@ -78,7 +78,7 @@ fn main() {
}
}
Command::Hash => {
let ht = yglnk_core::HashTableRef::parse(data, cli.location)
let ht = yglnk_core::hash_table::Ref::parse(data, cli.location)
.expect("unable to parse table header");
println!("hash\tL\tR\tname\t\ttyp");
for (hash, name, value) in ht.iter() {
@ -93,7 +93,7 @@ fn main() {
}
}
Command::HashLookup { key } => {
let ht = yglnk_core::HashTableRef::parse(data, cli.location)
let ht = yglnk_core::hash_table::Ref::parse(data, cli.location)
.expect("unable to parse table header");
match ht.lookup(key.as_bytes()) {
None => println!("(none)"),