collate all; get rid of string_cache

This commit is contained in:
Erik Zscheile 2019-12-23 16:56:34 +01:00
parent 83b93866f2
commit 319dbc5a46
5 changed files with 92 additions and 156 deletions

41
Cargo.lock generated
View file

@ -330,12 +330,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40f005c60db6e03bae699e414c58bf9aa7ea02a2d0b9bfbcf19286cc4c82b30"
[[package]]
name = "num-integer"
version = "0.1.41"
@ -355,21 +349,6 @@ dependencies = [
"autocfg",
]
[[package]]
name = "phf_shared"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
dependencies = [
"siphasher",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "prettytable-rs"
version = "0.8.0"
@ -412,7 +391,6 @@ dependencies = [
"fixed",
"prettytable-rs",
"readfilez",
"string_cache",
"term",
"thiserror",
]
@ -539,25 +517,6 @@ version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "414115f25f818d7dfccec8ee535d76949ae78584fc4f79a6f45a904bf8ab4449"
[[package]]
name = "siphasher"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83da420ee8d1a89e640d0948c646c1c088758d3a3c538f943bfa97bdac17929d"
[[package]]
name = "string_cache"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2940c75beb4e3bf3a494cef919a747a2cb81e52571e212bfbd185074add7208a"
dependencies = [
"lazy_static",
"new_debug_unreachable",
"phf_shared",
"precomputed-hash",
"serde",
]
[[package]]
name = "syn"
version = "0.15.44"

View file

@ -9,11 +9,10 @@ csv = "1.1"
chrono = "0.4"
encoding = "0.2"
fixed = "0.5"
readfilez = "0.2"
string_cache = "0.8"
thiserror = "1.0"
prettytable = { version = "0.8", package = "prettytable-rs" }
readfilez = "0.2"
term = "0.5"
thiserror = "1.0"
[profile.release]
lto = true

View file

@ -30,13 +30,14 @@ fn main() {
}
let mut stdout = term::stdout().unwrap();
let mut dat = Vec::<transaction::TransactionLine>::new();
for i in args {
writeln!(&mut stdout, "F = {}", i).unwrap();
let fh = readfilez::read_from_file(std::fs::File::open(i)).expect("unable to open file");
let dat = encoding::all::ISO_8859_1
let tmp = encoding::all::ISO_8859_1
.decode(&*fh, encoding::types::DecoderTrap::Replace)
.expect("got invalid latin-1 data");
@ -46,7 +47,7 @@ fn main() {
.delimiter(b';')
.flexible(true)
.has_headers(false)
.from_reader(dat.as_bytes());
.from_reader(tmp.as_bytes());
let mut recsit = rdr.records().skip(8);
@ -69,8 +70,6 @@ fn main() {
]
);
let mut tmp = transaction::TransactionColl::new();
for result in recsit {
let record = result.expect("got invalid line");
let record_bak = record.clone();
@ -78,87 +77,92 @@ fn main() {
std::convert::TryInto::try_into(record);
match pres {
Ok(tl) => tmp.push(tl),
Ok(tl) => dat.push(tl),
Err(transaction::ParseError::Finalizer) => break,
Err(x) => panic!("got error '{}' @ {:?}", x, record_bak),
}
}
}
let dat = tmp.finish();
dat.sort();
let oldlen = dat.len();
dat.dedup();
println!("dropped {} duplicates", oldlen - dat.len());
let mut accu = BTreeMap::<
(i32, bool, string_cache::DefaultAtom),
(usize, transaction::TransactionValue, String, usize),
>::new();
println!("{:#?}", &dat);
let mut i_skipped = 0usize;
for i in dat {
if i.direction != simple_enums::TransactionDirection::Haben
|| i.waehrung != simple_enums::Waehrung::EUR
|| i.p_other.is_empty()
|| i.p_other.find(" ZINS BIS ").is_some()
{
i_skipped += 1;
continue;
}
let mut ent = accu
.entry((
i.d_buchungs.year(),
i.d_buchungs.month() > 6,
i.p_other.clone(),
))
.or_default();
ent.0 += 1;
ent.1 += i.umsatz;
}
let mut accu = BTreeMap::<
(i32, bool, String),
(usize, transaction::TransactionValue, String, usize),
>::new();
writeln!(&mut stdout, "skipped {} entries", i_skipped).unwrap();
if accu.is_empty() {
let mut i_skipped = 0usize;
for i in dat {
if i.direction != simple_enums::TransactionDirection::Haben
|| i.waehrung != simple_enums::Waehrung::EUR
|| i.p_other.is_empty()
|| i.p_other.find(" ZINS BIS ").is_some()
{
i_skipped += 1;
continue;
}
let mut pdsp = 0;
for i in accu.values_mut() {
i.2 = i.1.to_string();
i.3 = i.2.find('.').map(|x| i.2.len() - x).unwrap_or(0);
pdsp = std::cmp::max(pdsp, i.3);
}
let mut table = Table::new();
table.set_format(
format::FormatBuilder::new()
.column_separator('│')
.borders('│')
.separator(
format::LinePosition::Top,
format::LineSeparator::new('─', '┬', '┌', '┐'),
)
.separator(
format::LinePosition::Title,
format::LineSeparator::new('─', '┼', '├', '┤'),
)
.separator(
format::LinePosition::Bottom,
format::LineSeparator::new('─', '┴', '└', '┘'),
)
.padding(1, 1)
.build(),
);
table.set_titles(row!["Jahr", "Einzahler", "Zahlungen", "Summe"]);
for (k, v) in accu.iter_mut() {
let yearuhj = format!("{} {}", k.0, if k.1 { "II" } else { "I" });
v.2 += &std::iter::repeat(' ').take(pdsp - v.3).collect::<String>();
table.add_row(if highlight.contains(&*k.2) {
row![yearuhj, FYBdb-> k.2, r-> v.0.to_string(), Fgbr-> v.2]
} else {
row![yearuhj, k.2, r-> v.0.to_string(), r-> v.2]
});
}
table
.print_term(&mut *stdout)
.expect("unable to print table");
let mut ent = accu
.entry((
i.d_buchungs.year(),
i.d_buchungs.month() > 6,
i.p_other.clone(),
))
.or_default();
ent.0 += 1;
ent.1 += i.umsatz;
}
writeln!(&mut stdout, "skipped {} entries", i_skipped).unwrap();
if accu.is_empty() {
return;
}
let mut pdsp = 0;
for i in accu.values_mut() {
i.2 = i.1.to_string();
i.3 = i.2.find('.').map(|x| i.2.len() - x).unwrap_or(0);
pdsp = std::cmp::max(pdsp, i.3);
}
let mut table = Table::new();
table.set_format(
format::FormatBuilder::new()
.column_separator('│')
.borders('│')
.separator(
format::LinePosition::Top,
format::LineSeparator::new('─', '┬', '┌', '┐'),
)
.separator(
format::LinePosition::Title,
format::LineSeparator::new('─', '┼', '├', '┤'),
)
.separator(
format::LinePosition::Bottom,
format::LineSeparator::new('─', '┴', '└', '┘'),
)
.padding(1, 1)
.build(),
);
table.set_titles(row!["Jahr", "Einzahler", "Zahlungen", "Summe"]);
for (k, v) in accu.iter_mut() {
let yearuhj = format!("{} {}", k.0, if k.1 { "II" } else { "I" });
v.2 += &std::iter::repeat(' ').take(pdsp - v.3).collect::<String>();
table.add_row(if highlight.contains(&*k.2) {
row![yearuhj, FYBdb-> k.2, r-> v.0.to_string(), Fgbr-> v.2]
} else {
row![yearuhj, k.2, r-> v.0.to_string(), r-> v.2]
});
}
table
.print_term(&mut *stdout)
.expect("unable to print table");
}

View file

@ -26,7 +26,7 @@ pub mod parser_error {
use std::fmt;
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Waehrung {
EUR,
USD,
@ -50,7 +50,7 @@ impl std::str::FromStr for Waehrung {
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum TransactionDirection {
Haben, // H = Gutschrift
Soll, // S = Belastung

View file

@ -1,7 +1,5 @@
use crate::simple_enums::{TransactionDirection, Waehrung};
use chrono::naive::NaiveDate;
use std::collections::HashSet;
use string_cache::DefaultAtom;
use thiserror::Error;
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
@ -13,48 +11,22 @@ pub enum KontoDaten {
pub type TransactionValue = fixed::types::U21F11;
#[derive(Clone, Debug, Eq, PartialEq)]
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
pub struct TransactionLine {
pub d_buchungs: NaiveDate,
pub d_valuta: NaiveDate,
// ignore 3. column = "Auftraggeber/Zahlungsempfänger",
// because that should be nearly equivalent for every row
pub p_other: DefaultAtom,
pub p_other: String,
pub konto_data: KontoDaten,
pub verwendungszw: String,
pub kref: String,
// ignore 10. column = "Kundenreferenz"
// should be almost always empty
pub waehrung: Waehrung,
pub umsatz: TransactionValue,
pub direction: TransactionDirection,
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct TransactionColl {
pothc: HashSet<DefaultAtom>,
elems: Vec<TransactionLine>,
}
impl TransactionColl {
#[inline]
pub fn new() -> Self {
Default::default()
}
pub fn push(&mut self, mut tl: TransactionLine) {
if let Some(x) = self.pothc.get(&tl.p_other) {
tl.p_other = x.clone();
} else {
self.pothc.insert(tl.p_other.clone());
}
self.elems.push(tl);
}
#[inline]
pub fn finish(self) -> Vec<TransactionLine> {
self.elems
}
}
#[derive(Clone, Debug, Error)]
pub enum ParseError {
#[error("invalid date")]
@ -99,6 +71,9 @@ impl std::convert::TryFrom<csv::StringRecord> for TransactionLine {
return Err(ParseError::Finalizer);
}
// no "Kundenreferenz"
assert!(record[9].is_empty());
// decode KontoDaten
let r_e: &[u8] = &[
record[4].is_empty() as u8,
@ -130,10 +105,9 @@ impl std::convert::TryFrom<csv::StringRecord> for TransactionLine {
Ok(TransactionLine {
d_buchungs: NaiveDate::parse_from_str(&record[0], "%d.%m.%Y")?,
d_valuta: NaiveDate::parse_from_str(&record[1], "%d.%m.%Y")?,
p_other: DefaultAtom::from(&record[3]),
p_other: record[3].to_string(),
konto_data,
verwendungszw: record[8].to_string(),
kref: record[9].to_string(),
waehrung: record[10].parse()?,
umsatz: record[11].replace('.', "").replace(',', ".").parse()?,
direction: record[12].parse()?,