From d0fefee6672ec222d5260c7a44d7e568df8edadf Mon Sep 17 00:00:00 2001 From: Lucas Schwiderski Date: Tue, 1 Jul 2025 10:37:19 +0200 Subject: [PATCH] dtmt: Optimize dictionary creation Improve the way the dictionary can read and write its config files, as well as improve the shared access during runtime. --- crates/dtmt/src/cmd/dictionary.rs | 11 +++++++---- crates/dtmt/src/main.rs | 6 ++++-- lib/sdk/src/context.rs | 9 ++++++--- lib/sdk/src/murmur/dictionary.rs | 12 ++++++++---- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/crates/dtmt/src/cmd/dictionary.rs b/crates/dtmt/src/cmd/dictionary.rs index 4c54c34..8f0d32c 100644 --- a/crates/dtmt/src/cmd/dictionary.rs +++ b/crates/dtmt/src/cmd/dictionary.rs @@ -1,4 +1,5 @@ use std::path::PathBuf; +use std::sync::Arc; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum}; use cli_table::{print_stdout, WithTitle}; @@ -156,6 +157,8 @@ pub(crate) async fn run(mut ctx: sdk::Context, matches: &ArgMatches) -> Result<( BufReader::new(Box::new(f)) }; + let lookup = Arc::make_mut(&mut ctx.lookup); + let group = sdk::murmur::HashGroup::from(*group); let mut added = 0; @@ -165,15 +168,15 @@ pub(crate) async fn run(mut ctx: sdk::Context, matches: &ArgMatches) -> Result<( let total = { for line in lines.into_iter() { let value = line?; - if ctx.lookup.find(&value, group).is_some() { + if lookup.find(&value, group).is_some() { skipped += 1; } else { - ctx.lookup.add(value, group); + lookup.add(value, group); added += 1; } } - ctx.lookup.len() + lookup.len() }; let out_path = matches @@ -190,7 +193,7 @@ pub(crate) async fn run(mut ctx: sdk::Context, matches: &ArgMatches) -> Result<( }) .with_section(|| out_path.display().to_string().header("Path:"))?; - ctx.lookup + lookup .to_csv(f) .await .wrap_err("Failed to write dictionary to disk")?; diff --git a/crates/dtmt/src/main.rs b/crates/dtmt/src/main.rs index e41e802..ef3d36b 100644 --- a/crates/dtmt/src/main.rs +++ b/crates/dtmt/src/main.rs @@ -12,6 +12,7 @@ use clap::value_parser; use clap::{command, Arg}; use color_eyre::eyre; use color_eyre::eyre::{Context, Result}; +use sdk::murmur::Dictionary; use serde::{Deserialize, Serialize}; use tokio::fs::File; use tokio::io::BufReader; @@ -107,8 +108,9 @@ async fn main() -> Result<()> { let r = BufReader::new(f); let mut ctx = ctx.write().await; - if let Err(err) = ctx.lookup.from_csv(r).await { - tracing::error!("{:#}", err); + match Dictionary::from_csv(r).await { + Ok(lookup) => ctx.lookup = Arc::new(lookup), + Err(err) => tracing::error!("{:#}", err), } }) }; diff --git a/lib/sdk/src/context.rs b/lib/sdk/src/context.rs index 1500290..8c10b3c 100644 --- a/lib/sdk/src/context.rs +++ b/lib/sdk/src/context.rs @@ -1,8 +1,11 @@ +use std::ffi::OsString; +use std::path::PathBuf; use std::process::Command; -use std::{ffi::OsString, path::PathBuf}; +use std::sync::Arc; use crate::murmur::{Dictionary, HashGroup, IdString64, Murmur32, Murmur64}; +#[derive(Clone)] pub struct CmdLine { cmd: OsString, args: Vec, @@ -52,7 +55,7 @@ impl From<&CmdLine> for Command { } pub struct Context { - pub lookup: Dictionary, + pub lookup: Arc, pub ljd: Option, pub revorb: Option, pub ww2ogg: Option, @@ -62,7 +65,7 @@ pub struct Context { impl Context { pub fn new() -> Self { Self { - lookup: Dictionary::new(), + lookup: Arc::new(Dictionary::new()), ljd: None, revorb: None, ww2ogg: None, diff --git a/lib/sdk/src/murmur/dictionary.rs b/lib/sdk/src/murmur/dictionary.rs index 267f0a4..c1b5636 100644 --- a/lib/sdk/src/murmur/dictionary.rs +++ b/lib/sdk/src/murmur/dictionary.rs @@ -48,6 +48,7 @@ struct Row { group: HashGroup, } +#[derive(Clone)] pub struct Entry { value: String, long: Murmur64, @@ -73,6 +74,7 @@ impl Entry { } } +#[derive(Clone)] pub struct Dictionary { entries: Vec, } @@ -88,10 +90,12 @@ impl Dictionary { Self { entries: vec![] } } - pub async fn from_csv(&mut self, r: R) -> Result<()> + pub async fn from_csv(r: R) -> Result where R: AsyncRead + std::marker::Unpin + std::marker::Send, { + let mut entries = vec![]; + let r = AsyncDeserializer::from_reader(r); let mut records = r.into_deserialize::(); @@ -112,10 +116,10 @@ impl Dictionary { group: record.group, }; - self.entries.push(entry); + entries.push(entry); } - Ok(()) + Ok(Self { entries }) } pub async fn to_csv(&self, w: W) -> Result<()> @@ -161,7 +165,7 @@ impl Dictionary { self.entries.push(entry); } - pub fn find(&mut self, value: &String, group: HashGroup) -> Option<&Entry> { + pub fn find(&self, value: &String, group: HashGroup) -> Option<&Entry> { self.entries .iter() .find(|e| e.value == *value && e.group == group)