fix: Fix parsing CSV dictionary

`csv_async`'s deserializer seems to only emit a few specific types
to the supplied visitor. Namely strings and 64 bit numbers. I was
not able to figure out why some numbers where considered floats.

So each visitor for the hash types needs to implement all of those
data types and convert them, if necessary.
This commit is contained in:
Lucas Schwiderski 2022-11-15 15:10:59 +01:00
parent 18af90ec1f
commit c514f36bcb
Signed by: lucas
GPG key ID: AA12679AAA6DF4D8
4 changed files with 48 additions and 14 deletions

View file

@ -16,7 +16,7 @@ pub(crate) fn command_definition() -> Command {
.about("Manipulate a hash dictionary file.") .about("Manipulate a hash dictionary file.")
.subcommand( .subcommand(
Command::new("lookup") Command::new("lookup")
.about("Lookup a hash in the dictionary") .about("Lookup a hash in the dictionary.")
.arg(Arg::new("hash").help("The hash to look up").required(true)) .arg(Arg::new("hash").help("The hash to look up").required(true))
.arg( .arg(
Arg::new("group") Arg::new("group")
@ -52,7 +52,7 @@ pub(crate) fn command_definition() -> Command {
), ),
) )
.subcommand(Command::new("save").about( .subcommand(Command::new("save").about(
"Save back the currently loaded dictionary, with hashes pre-computed.\ "Save back the currently loaded dictionary, with hashes pre-computed. \
Pre-computing hashes speeds up loading large dictionaries, as they would \ Pre-computing hashes speeds up loading large dictionaries, as they would \
otherwise need to be computed on the fly.", otherwise need to be computed on the fly.",
)) ))

View file

@ -8,7 +8,6 @@ use clap::parser::ValueSource;
use clap::value_parser; use clap::value_parser;
use clap::{command, Arg}; use clap::{command, Arg};
use color_eyre::eyre::{Context, Result}; use color_eyre::eyre::{Context, Result};
use color_eyre::{Help, SectionExt};
use tokio::fs::File; use tokio::fs::File;
use tokio::io::BufReader; use tokio::io::BufReader;
use tokio::sync::RwLock; use tokio::sync::RwLock;
@ -81,8 +80,7 @@ async fn main() -> Result<()> {
let mut ctx = ctx.write().await; let mut ctx = ctx.write().await;
let res = File::open(&path) let res = File::open(&path)
.await .await
.wrap_err("Failed to open dictionary file") .wrap_err_with(|| format!("failed to open dictionary file: {}", path.display()));
.with_section(|| path.display().to_string().header("Path:"));
let f = match res { let f = match res {
Ok(f) => f, Ok(f) => f,
@ -98,7 +96,7 @@ async fn main() -> Result<()> {
let r = BufReader::new(f); let r = BufReader::new(f);
if let Err(err) = ctx.lookup.from_csv(r).await { if let Err(err) = ctx.lookup.from_csv(r).await {
tracing::error!("{}", err); tracing::error!("{:?}", err);
} }
}); });
} }

View file

@ -113,7 +113,8 @@ impl Bundle {
// `AsyncRead` and the bundle name separately. // `AsyncRead` and the bundle name separately.
let path = path.as_ref(); let path = path.as_ref();
let bundle_name = if let Some(name) = path.file_name() { let bundle_name = if let Some(name) = path.file_name() {
let hash = Murmur64::try_from(name.to_string_lossy().as_ref())?; let hash = Murmur64::try_from(name.to_string_lossy().as_ref())
.wrap_err_with(|| format!("failed to turn string into hash: {:?}", name))?;
ctx.read().await.lookup_hash(hash, HashGroup::Filename) ctx.read().await.lookup_hash(hash, HashGroup::Filename)
} else { } else {
eyre::bail!("Invalid path to bundle file: {}", path.display()); eyre::bail!("Invalid path to bundle file: {}", path.display());

View file

@ -64,12 +64,23 @@ impl<'de> Visitor<'de> for Murmur64 {
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str( formatter.write_str(
"an usinged 64 bit integer \ "an usigned 64 bit integer \
or a string in hexadecimal format encoding such an integer", or a string in hexadecimal format encoding such an integer",
) )
} }
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> { fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
self.visit_u64(u64::from_le_bytes(bytes))
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value)) Ok(Self::from(value))
} }
@ -79,7 +90,10 @@ impl<'de> Visitor<'de> for Murmur64 {
{ {
match Murmur64::try_from(value) { match Murmur64::try_from(value) {
Ok(hash) => Ok(hash), Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(err)), Err(err) => Err(E::custom(format!(
"failed to convert '{}' to Murmur64: {}",
value, err
))),
} }
} }
} }
@ -112,7 +126,7 @@ impl TryFrom<&str> for Murmur32 {
type Error = ParseIntError; type Error = ParseIntError;
fn try_from(value: &str) -> Result<Self, Self::Error> { fn try_from(value: &str) -> Result<Self, Self::Error> {
u32::from_str_radix(value, 8).map(Self) u32::from_str_radix(value, 16).map(Self)
} }
} }
@ -142,12 +156,30 @@ impl<'de> Visitor<'de> for Murmur32 {
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str( formatter.write_str(
"an usinged 32 bit integer \ "an usigned 32 bit integer \
or a string in hexadecimal format encoding such an integer", or a string in hexadecimal format encoding such an integer",
) )
} }
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E> { fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
self.visit_u32(u64::from_le_bytes(bytes) as u32)
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_u32(value as u32)
}
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value)) Ok(Self::from(value))
} }
@ -157,7 +189,10 @@ impl<'de> Visitor<'de> for Murmur32 {
{ {
match Murmur32::try_from(value) { match Murmur32::try_from(value) {
Ok(hash) => Ok(hash), Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(err)), Err(err) => Err(E::custom(format!(
"failed to convert '{}' to Murmur32: {}",
value, err
))),
} }
} }
} }