feat(dtmt): Add command to print the dictionary

This is mostly helpful to check/debug whether the internal dictionary
actually contains the expected data. For manually looking through the
entire dictionary, opening the CSV file is still more convenient.
This commit is contained in:
Lucas Schwiderski 2023-02-17 11:10:56 +01:00
parent 9077d791b2
commit 1d08498131
Signed by: lucas
GPG key ID: AA12679AAA6DF4D8
5 changed files with 81 additions and 2 deletions

23
Cargo.lock generated
View file

@ -291,6 +291,28 @@ dependencies = [
"os_str_bytes",
]
[[package]]
name = "cli-table"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adfbb116d9e2c4be7011360d0c0bee565712c11e969c9609b25b619366dc379d"
dependencies = [
"cli-table-derive",
"termcolor",
"unicode-width",
]
[[package]]
name = "cli-table-derive"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2af3bfb9da627b0a6c467624fb7963921433774ed435493b5c08a3053e829ad4"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clipboard-win"
version = "4.4.2"
@ -654,6 +676,7 @@ name = "dtmt"
version = "0.2.0"
dependencies = [
"clap",
"cli-table",
"color-eyre",
"confy",
"csv-async",

View file

@ -26,6 +26,7 @@ confy = "0.5.1"
zip = "0.6.3"
string_template = "0.2.1"
promptly = "0.3.1"
cli-table = { version = "0.4.7", default-features = false, features = ["derive"] }
[dev-dependencies]
tempfile = "3.3.0"

View file

@ -1,8 +1,10 @@
use std::path::PathBuf;
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
use cli_table::{print_stdout, WithTitle};
use color_eyre::eyre::{Context, Result};
use color_eyre::{Help, SectionExt};
use sdk::murmur::{IdString64, Murmur32, Murmur64};
use tokio::fs::File;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio_stream::wrappers::LinesStream;
@ -27,6 +29,29 @@ impl From<HashGroup> for sdk::murmur::HashGroup {
}
}
#[derive(cli_table::Table)]
struct TableRow {
#[table(title = "Value")]
value: String,
#[table(title = "Murmur64")]
long: Murmur64,
#[table(title = "Murmur32")]
short: Murmur32,
#[table(title = "Group")]
group: sdk::murmur::HashGroup,
}
impl From<&sdk::murmur::Entry> for TableRow {
fn from(entry: &sdk::murmur::Entry) -> Self {
Self {
value: entry.value().clone(),
long: entry.long(),
short: entry.short(),
group: entry.group(),
}
}
}
pub(crate) fn command_definition() -> Command {
Command::new("dictionary")
.about("Manipulate a hash dictionary file.")
@ -67,6 +92,7 @@ pub(crate) fn command_definition() -> Command {
.value_parser(value_parser!(PathBuf)),
),
)
.subcommand(Command::new("show").about("Show the contents of the dictionary"))
.subcommand(Command::new("save").about(
"Save back the currently loaded dictionary, with hashes pre-computed. \
Pre-computing hashes speeds up loading large dictionaries, as they would \
@ -176,6 +202,14 @@ pub(crate) async fn run(mut ctx: sdk::Context, matches: &ArgMatches) -> Result<(
.await
.wrap_err("Failed to write dictionary to disk")
}
Some(("show", _)) => {
let lookup = &ctx.lookup;
let rows: Vec<_> = lookup.entries().iter().map(TableRow::from).collect();
print_stdout(rows.with_title())?;
Ok(())
}
_ => unreachable!(
"clap is configured to require a subcommand, and they're all handled above"
),

View file

@ -55,6 +55,24 @@ pub struct Entry {
group: HashGroup,
}
impl Entry {
pub fn value(&self) -> &String {
&self.value
}
pub fn long(&self) -> Murmur64 {
self.long
}
pub fn short(&self) -> Murmur32 {
self.short
}
pub fn group(&self) -> HashGroup {
self.group
}
}
pub struct Dictionary {
entries: Vec<Entry>,
}
@ -172,4 +190,8 @@ impl Dictionary {
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn entries(&self) -> &Vec<Entry> {
&self.entries
}
}

View file

@ -13,8 +13,7 @@ mod murmurhash64;
pub const SEED: u32 = 0;
pub use dictionary::Dictionary;
pub use dictionary::HashGroup;
pub use dictionary::{Dictionary, Entry, HashGroup};
pub use murmurhash64::hash;
pub use murmurhash64::hash32;
pub use murmurhash64::hash_inverse as inverse;