From c514f36bcb0528138dcfb0bc138242493ddfa6e0 Mon Sep 17 00:00:00 2001 From: Lucas Schwiderski Date: Tue, 15 Nov 2022 15:10:59 +0100 Subject: [PATCH] fix: Fix parsing CSV dictionary `csv_async`'s deserializer seems to only emit a few specific types to the supplied visitor. Namely strings and 64 bit numbers. I was not able to figure out why some numbers where considered floats. So each visitor for the hash types needs to implement all of those data types and convert them, if necessary. --- src/bin/cmd/dictionary.rs | 4 ++-- src/bin/dtmt.rs | 6 ++--- src/bundle/mod.rs | 3 ++- src/murmur/mod.rs | 49 +++++++++++++++++++++++++++++++++------ 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/src/bin/cmd/dictionary.rs b/src/bin/cmd/dictionary.rs index af9ac89..d160def 100644 --- a/src/bin/cmd/dictionary.rs +++ b/src/bin/cmd/dictionary.rs @@ -16,7 +16,7 @@ pub(crate) fn command_definition() -> Command { .about("Manipulate a hash dictionary file.") .subcommand( Command::new("lookup") - .about("Lookup a hash in the dictionary") + .about("Lookup a hash in the dictionary.") .arg(Arg::new("hash").help("The hash to look up").required(true)) .arg( Arg::new("group") @@ -52,7 +52,7 @@ pub(crate) fn command_definition() -> Command { ), ) .subcommand(Command::new("save").about( - "Save back the currently loaded dictionary, with hashes pre-computed.\ + "Save back the currently loaded dictionary, with hashes pre-computed. \ Pre-computing hashes speeds up loading large dictionaries, as they would \ otherwise need to be computed on the fly.", )) diff --git a/src/bin/dtmt.rs b/src/bin/dtmt.rs index 08517a0..ed54934 100644 --- a/src/bin/dtmt.rs +++ b/src/bin/dtmt.rs @@ -8,7 +8,6 @@ use clap::parser::ValueSource; use clap::value_parser; use clap::{command, Arg}; use color_eyre::eyre::{Context, Result}; -use color_eyre::{Help, SectionExt}; use tokio::fs::File; use tokio::io::BufReader; use tokio::sync::RwLock; @@ -81,8 +80,7 @@ async fn main() -> Result<()> { let mut ctx = ctx.write().await; let res = File::open(&path) .await - .wrap_err("Failed to open dictionary file") - .with_section(|| path.display().to_string().header("Path:")); + .wrap_err_with(|| format!("failed to open dictionary file: {}", path.display())); let f = match res { Ok(f) => f, @@ -98,7 +96,7 @@ async fn main() -> Result<()> { let r = BufReader::new(f); if let Err(err) = ctx.lookup.from_csv(r).await { - tracing::error!("{}", err); + tracing::error!("{:?}", err); } }); } diff --git a/src/bundle/mod.rs b/src/bundle/mod.rs index fdd6714..0d429f8 100644 --- a/src/bundle/mod.rs +++ b/src/bundle/mod.rs @@ -113,7 +113,8 @@ impl Bundle { // `AsyncRead` and the bundle name separately. let path = path.as_ref(); let bundle_name = if let Some(name) = path.file_name() { - let hash = Murmur64::try_from(name.to_string_lossy().as_ref())?; + let hash = Murmur64::try_from(name.to_string_lossy().as_ref()) + .wrap_err_with(|| format!("failed to turn string into hash: {:?}", name))?; ctx.read().await.lookup_hash(hash, HashGroup::Filename) } else { eyre::bail!("Invalid path to bundle file: {}", path.display()); diff --git a/src/murmur/mod.rs b/src/murmur/mod.rs index 05a12ce..45c9dfa 100644 --- a/src/murmur/mod.rs +++ b/src/murmur/mod.rs @@ -64,12 +64,23 @@ impl<'de> Visitor<'de> for Murmur64 { fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str( - "an usinged 64 bit integer \ + "an usigned 64 bit integer \ or a string in hexadecimal format encoding such an integer", ) } - fn visit_u64(self, value: u64) -> Result { + fn visit_f64(self, value: f64) -> Result + where + E: serde::de::Error, + { + let bytes = value.to_le_bytes(); + self.visit_u64(u64::from_le_bytes(bytes)) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { Ok(Self::from(value)) } @@ -79,7 +90,10 @@ impl<'de> Visitor<'de> for Murmur64 { { match Murmur64::try_from(value) { Ok(hash) => Ok(hash), - Err(err) => Err(E::custom(err)), + Err(err) => Err(E::custom(format!( + "failed to convert '{}' to Murmur64: {}", + value, err + ))), } } } @@ -112,7 +126,7 @@ impl TryFrom<&str> for Murmur32 { type Error = ParseIntError; fn try_from(value: &str) -> Result { - u32::from_str_radix(value, 8).map(Self) + u32::from_str_radix(value, 16).map(Self) } } @@ -142,12 +156,30 @@ impl<'de> Visitor<'de> for Murmur32 { fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str( - "an usinged 32 bit integer \ + "an usigned 32 bit integer \ or a string in hexadecimal format encoding such an integer", ) } - fn visit_u32(self, value: u32) -> Result { + fn visit_f64(self, value: f64) -> Result + where + E: serde::de::Error, + { + let bytes = value.to_le_bytes(); + self.visit_u32(u64::from_le_bytes(bytes) as u32) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + self.visit_u32(value as u32) + } + + fn visit_u32(self, value: u32) -> Result + where + E: serde::de::Error, + { Ok(Self::from(value)) } @@ -157,7 +189,10 @@ impl<'de> Visitor<'de> for Murmur32 { { match Murmur32::try_from(value) { Ok(hash) => Ok(hash), - Err(err) => Err(E::custom(err)), + Err(err) => Err(E::custom(format!( + "failed to convert '{}' to Murmur32: {}", + value, err + ))), } } }