commit cf4dadd151b860cc6a057b1a1950e48693c3a21f Author: Erik Zscheile Date: Sun Dec 22 17:55:38 2019 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..c267b72 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,250 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "bstr" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d6c2c5b58ab920a4f5aeaaca34b4488074e8cc7596af94e6f8c6ff247c60245" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" + +[[package]] +name = "csv" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37519ccdfd73a75821cac9319d4fce15a81b9fcf75f951df5b9988aa3a0af87d" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b5cadb6b25c77aeff80ba701712494213f4a8418fcda2ee11b6560c3ad0bf4c" +dependencies = [ + "memchr", +] + +[[package]] +name = "delegate" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf5caf67ecbe612af35e1a9511e7959ecbc42b05f2c5145dbbf5f8fe817b1289" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "encoding" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" +dependencies = [ + "encoding-index-japanese", + "encoding-index-korean", + "encoding-index-simpchinese", + "encoding-index-singlebyte", + "encoding-index-tradchinese", +] + +[[package]] +name = "encoding-index-japanese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-korean" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-simpchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-singlebyte" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-tradchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding_index_tests" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" + +[[package]] +name = "itoa" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" + +[[package]] +name = "memchr" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" +dependencies = [ + "libc", +] + +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rdcsv" +version = "0.1.0" +dependencies = [ + "csv", + "encoding", + "readfilez", +] + +[[package]] +name = "readfilez" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e8cec9e94800df7eb42c8a22bf6e7c51d5893e2c76f202a182c52383c25fda" +dependencies = [ + "delegate", + "memmap", +] + +[[package]] +name = "regex-automata" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92b73c2a1770c255c240eaa4ee600df1704a38dc3feaa6e949e7fcd4f8dc09f9" +dependencies = [ + "byteorder", +] + +[[package]] +name = "ryu" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" + +[[package]] +name = "serde" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "414115f25f818d7dfccec8ee535d76949ae78584fc4f79a6f45a904bf8ab4449" + +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "winapi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..87e06eb --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "rdcsv" +version = "0.1.0" +authors = ["Erik Zscheile "] +edition = "2018" + +[dependencies] +csv = "1.1" +encoding = "0.2" +readfilez = "0.2" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..98bfaa0 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,24 @@ +fn main() { + use encoding::types::Encoding; + + for i in std::env::args().skip(1) { + println!("F = {}", i); + + let fh = readfilez::read_from_file(std::fs::File::open(i)).expect("unable to open file"); + + let dat = encoding::all::ISO_8859_1 + .decode(&*fh, encoding::types::DecoderTrap::Replace) + .expect("got invalid latin-1 data"); + + let mut rdr = csv::ReaderBuilder::new() + .delimiter(b';') + .flexible(true) + .has_headers(false) + .from_reader(dat.as_bytes()); + + for result in rdr.records() { + let record = result.expect("got invalid line"); + println!("{:?}", record); + } + } +}