feat: Show status when adding dictionary entries
This commit is contained in:
parent
61a80f71a7
commit
2c34d98193
3 changed files with 42 additions and 5 deletions
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
== [Unreleased]
|
== [Unreleased]
|
||||||
|
|
||||||
|
=== Added
|
||||||
|
|
||||||
|
- show status after adding dictionary entries
|
||||||
|
|
||||||
== [v0.2.0] - 2022-12-28
|
== [v0.2.0] - 2022-12-28
|
||||||
|
|
||||||
=== Added
|
=== Added
|
||||||
|
|
|
@ -112,14 +112,25 @@ pub(crate) async fn run(ctx: Arc<RwLock<sdk::Context>>, matches: &ArgMatches) ->
|
||||||
BufReader::new(Box::new(f))
|
BufReader::new(Box::new(f))
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut added = 0;
|
||||||
|
let mut skipped = 0;
|
||||||
|
|
||||||
let lines: Vec<_> = LinesStream::new(r.lines()).collect().await;
|
let lines: Vec<_> = LinesStream::new(r.lines()).collect().await;
|
||||||
{
|
let total = {
|
||||||
let mut ctx = ctx.write().await;
|
let mut ctx = ctx.write().await;
|
||||||
for line in lines.into_iter() {
|
for line in lines.into_iter() {
|
||||||
ctx.lookup.add(line?, (*group).into());
|
let value = line?;
|
||||||
|
if ctx.lookup.find(&value, (*group).into()).is_some() {
|
||||||
|
skipped += 1;
|
||||||
|
} else {
|
||||||
|
ctx.lookup.add(value, (*group).into());
|
||||||
|
added += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx.lookup.len()
|
||||||
|
};
|
||||||
|
|
||||||
let out_path = matches
|
let out_path = matches
|
||||||
.get_one::<PathBuf>("dictionary")
|
.get_one::<PathBuf>("dictionary")
|
||||||
.expect("no default value for 'dictionary' parameter");
|
.expect("no default value for 'dictionary' parameter");
|
||||||
|
@ -139,7 +150,15 @@ pub(crate) async fn run(ctx: Arc<RwLock<sdk::Context>>, matches: &ArgMatches) ->
|
||||||
.lookup
|
.lookup
|
||||||
.to_csv(f)
|
.to_csv(f)
|
||||||
.await
|
.await
|
||||||
.wrap_err("Failed to write dictionary to disk")
|
.wrap_err("Failed to write dictionary to disk")?;
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
"Added {} entries, skipped {} duplicates. Total now {}.",
|
||||||
|
added,
|
||||||
|
skipped,
|
||||||
|
total
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
Some(("save", _)) => {
|
Some(("save", _)) => {
|
||||||
let out_path = matches
|
let out_path = matches
|
||||||
|
|
|
@ -48,7 +48,7 @@ struct Row {
|
||||||
group: HashGroup,
|
group: HashGroup,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Entry {
|
pub struct Entry {
|
||||||
value: String,
|
value: String,
|
||||||
long: Murmur64,
|
long: Murmur64,
|
||||||
short: Murmur32,
|
short: Murmur32,
|
||||||
|
@ -143,6 +143,12 @@ impl Dictionary {
|
||||||
self.entries.push(entry);
|
self.entries.push(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn find(&mut self, value: &String, group: HashGroup) -> Option<&Entry> {
|
||||||
|
self.entries
|
||||||
|
.iter()
|
||||||
|
.find(|e| e.value == *value && e.group == group)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn lookup(&self, hash: Murmur64, group: HashGroup) -> Option<&String> {
|
pub fn lookup(&self, hash: Murmur64, group: HashGroup) -> Option<&String> {
|
||||||
self.entries
|
self.entries
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -158,4 +164,12 @@ impl Dictionary {
|
||||||
.find(|e| e.short == hash)
|
.find(|e| e.short == hash)
|
||||||
.map(|e| &e.value)
|
.map(|e| &e.value)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.entries.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.entries.is_empty()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue