fix: Fix parsing CSV dictionary
`csv_async`'s deserializer seems to only emit a few specific types to the supplied visitor. Namely strings and 64 bit numbers. I was not able to figure out why some numbers where considered floats. So each visitor for the hash types needs to implement all of those data types and convert them, if necessary.
This commit is contained in:
parent
18af90ec1f
commit
c514f36bcb
4 changed files with 48 additions and 14 deletions
|
@ -16,7 +16,7 @@ pub(crate) fn command_definition() -> Command {
|
||||||
.about("Manipulate a hash dictionary file.")
|
.about("Manipulate a hash dictionary file.")
|
||||||
.subcommand(
|
.subcommand(
|
||||||
Command::new("lookup")
|
Command::new("lookup")
|
||||||
.about("Lookup a hash in the dictionary")
|
.about("Lookup a hash in the dictionary.")
|
||||||
.arg(Arg::new("hash").help("The hash to look up").required(true))
|
.arg(Arg::new("hash").help("The hash to look up").required(true))
|
||||||
.arg(
|
.arg(
|
||||||
Arg::new("group")
|
Arg::new("group")
|
||||||
|
@ -52,7 +52,7 @@ pub(crate) fn command_definition() -> Command {
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.subcommand(Command::new("save").about(
|
.subcommand(Command::new("save").about(
|
||||||
"Save back the currently loaded dictionary, with hashes pre-computed.\
|
"Save back the currently loaded dictionary, with hashes pre-computed. \
|
||||||
Pre-computing hashes speeds up loading large dictionaries, as they would \
|
Pre-computing hashes speeds up loading large dictionaries, as they would \
|
||||||
otherwise need to be computed on the fly.",
|
otherwise need to be computed on the fly.",
|
||||||
))
|
))
|
||||||
|
|
|
@ -8,7 +8,6 @@ use clap::parser::ValueSource;
|
||||||
use clap::value_parser;
|
use clap::value_parser;
|
||||||
use clap::{command, Arg};
|
use clap::{command, Arg};
|
||||||
use color_eyre::eyre::{Context, Result};
|
use color_eyre::eyre::{Context, Result};
|
||||||
use color_eyre::{Help, SectionExt};
|
|
||||||
use tokio::fs::File;
|
use tokio::fs::File;
|
||||||
use tokio::io::BufReader;
|
use tokio::io::BufReader;
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
|
@ -81,8 +80,7 @@ async fn main() -> Result<()> {
|
||||||
let mut ctx = ctx.write().await;
|
let mut ctx = ctx.write().await;
|
||||||
let res = File::open(&path)
|
let res = File::open(&path)
|
||||||
.await
|
.await
|
||||||
.wrap_err("Failed to open dictionary file")
|
.wrap_err_with(|| format!("failed to open dictionary file: {}", path.display()));
|
||||||
.with_section(|| path.display().to_string().header("Path:"));
|
|
||||||
|
|
||||||
let f = match res {
|
let f = match res {
|
||||||
Ok(f) => f,
|
Ok(f) => f,
|
||||||
|
@ -98,7 +96,7 @@ async fn main() -> Result<()> {
|
||||||
|
|
||||||
let r = BufReader::new(f);
|
let r = BufReader::new(f);
|
||||||
if let Err(err) = ctx.lookup.from_csv(r).await {
|
if let Err(err) = ctx.lookup.from_csv(r).await {
|
||||||
tracing::error!("{}", err);
|
tracing::error!("{:?}", err);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,7 +113,8 @@ impl Bundle {
|
||||||
// `AsyncRead` and the bundle name separately.
|
// `AsyncRead` and the bundle name separately.
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
let bundle_name = if let Some(name) = path.file_name() {
|
let bundle_name = if let Some(name) = path.file_name() {
|
||||||
let hash = Murmur64::try_from(name.to_string_lossy().as_ref())?;
|
let hash = Murmur64::try_from(name.to_string_lossy().as_ref())
|
||||||
|
.wrap_err_with(|| format!("failed to turn string into hash: {:?}", name))?;
|
||||||
ctx.read().await.lookup_hash(hash, HashGroup::Filename)
|
ctx.read().await.lookup_hash(hash, HashGroup::Filename)
|
||||||
} else {
|
} else {
|
||||||
eyre::bail!("Invalid path to bundle file: {}", path.display());
|
eyre::bail!("Invalid path to bundle file: {}", path.display());
|
||||||
|
|
|
@ -64,12 +64,23 @@ impl<'de> Visitor<'de> for Murmur64 {
|
||||||
|
|
||||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||||
formatter.write_str(
|
formatter.write_str(
|
||||||
"an usinged 64 bit integer \
|
"an usigned 64 bit integer \
|
||||||
or a string in hexadecimal format encoding such an integer",
|
or a string in hexadecimal format encoding such an integer",
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> {
|
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
|
||||||
|
where
|
||||||
|
E: serde::de::Error,
|
||||||
|
{
|
||||||
|
let bytes = value.to_le_bytes();
|
||||||
|
self.visit_u64(u64::from_le_bytes(bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
|
||||||
|
where
|
||||||
|
E: serde::de::Error,
|
||||||
|
{
|
||||||
Ok(Self::from(value))
|
Ok(Self::from(value))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +90,10 @@ impl<'de> Visitor<'de> for Murmur64 {
|
||||||
{
|
{
|
||||||
match Murmur64::try_from(value) {
|
match Murmur64::try_from(value) {
|
||||||
Ok(hash) => Ok(hash),
|
Ok(hash) => Ok(hash),
|
||||||
Err(err) => Err(E::custom(err)),
|
Err(err) => Err(E::custom(format!(
|
||||||
|
"failed to convert '{}' to Murmur64: {}",
|
||||||
|
value, err
|
||||||
|
))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -112,7 +126,7 @@ impl TryFrom<&str> for Murmur32 {
|
||||||
type Error = ParseIntError;
|
type Error = ParseIntError;
|
||||||
|
|
||||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||||
u32::from_str_radix(value, 8).map(Self)
|
u32::from_str_radix(value, 16).map(Self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,12 +156,30 @@ impl<'de> Visitor<'de> for Murmur32 {
|
||||||
|
|
||||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||||
formatter.write_str(
|
formatter.write_str(
|
||||||
"an usinged 32 bit integer \
|
"an usigned 32 bit integer \
|
||||||
or a string in hexadecimal format encoding such an integer",
|
or a string in hexadecimal format encoding such an integer",
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E> {
|
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
|
||||||
|
where
|
||||||
|
E: serde::de::Error,
|
||||||
|
{
|
||||||
|
let bytes = value.to_le_bytes();
|
||||||
|
self.visit_u32(u64::from_le_bytes(bytes) as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
|
||||||
|
where
|
||||||
|
E: serde::de::Error,
|
||||||
|
{
|
||||||
|
self.visit_u32(value as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
|
||||||
|
where
|
||||||
|
E: serde::de::Error,
|
||||||
|
{
|
||||||
Ok(Self::from(value))
|
Ok(Self::from(value))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,7 +189,10 @@ impl<'de> Visitor<'de> for Murmur32 {
|
||||||
{
|
{
|
||||||
match Murmur32::try_from(value) {
|
match Murmur32::try_from(value) {
|
||||||
Ok(hash) => Ok(hash),
|
Ok(hash) => Ok(hash),
|
||||||
Err(err) => Err(E::custom(err)),
|
Err(err) => Err(E::custom(format!(
|
||||||
|
"failed to convert '{}' to Murmur32: {}",
|
||||||
|
value, err
|
||||||
|
))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue