From 036c20bd8cb4e5b15a96c09ae723eef399c60b1f Mon Sep 17 00:00:00 2001 From: Lucas Schwiderski Date: Fri, 17 Feb 2023 11:13:47 +0100 Subject: [PATCH] feat(lib): Implement IdString type This type is similar to an `Either` between a `Murmur64` hash and a `String`. This is necessary to be able to retain hash information where the hash is not in the dictionary, but at the same time allow string names where they are available. Up until now, when reading a bundle, all hashes would be converted to strings, which made sense for displaying those names. But when writing the same bundle back, those strings ended up being re-hashed, resulting in incorrect hashes. --- crates/dtmt/src/cmd/bundle/inject.rs | 8 +-- crates/dtmt/src/cmd/bundle/list.rs | 2 +- crates/dtmt/src/cmd/dictionary.rs | 16 +++-- lib/sdk/src/bundle/file.rs | 26 +++++--- lib/sdk/src/bundle/mod.rs | 8 ++- lib/sdk/src/context.rs | 8 +-- lib/sdk/src/filetype/package.rs | 5 +- lib/sdk/src/murmur/mod.rs | 90 ++++++++++++++++++++++++++++ 8 files changed, 137 insertions(+), 26 deletions(-) diff --git a/crates/dtmt/src/cmd/bundle/inject.rs b/crates/dtmt/src/cmd/bundle/inject.rs index 6d583b7..9c47686 100644 --- a/crates/dtmt/src/cmd/bundle/inject.rs +++ b/crates/dtmt/src/cmd/bundle/inject.rs @@ -58,14 +58,14 @@ pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> { Bundle::from_binary(&ctx, name, binary).wrap_err("Failed to open bundle file")? }; - if let Some(_name) = matches.get_one::("replace") { + if let Some(name) = matches.get_one::("replace") { let mut file = File::open(&file_path) .await .wrap_err_with(|| format!("failed to open '{}'", file_path.display()))?; if let Some(variant) = bundle .files_mut() - .filter(|file| file.matches_name(_name)) + .filter(|file| file.matches_name(name.clone())) // TODO: Handle file variants .find_map(|file| file.variants_mut().next()) { @@ -75,7 +75,7 @@ pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> { .wrap_err("failed to read input file")?; variant.set_data(data); } else { - let err = eyre::eyre!("No file '{}' in this bundle.", _name) + let err = eyre::eyre!("No file '{}' in this bundle.", name) .with_suggestion(|| { format!( "Run '{} bundle list {}' to list the files in this bundle.", @@ -87,7 +87,7 @@ pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> { format!( "Use '{} bundle inject --add {} {} {}' to add it as a new file", clap::crate_name!(), - _name, + name, bundle_path.display(), file_path.display() ) diff --git a/crates/dtmt/src/cmd/bundle/list.rs b/crates/dtmt/src/cmd/bundle/list.rs index ec869ba..a206af3 100644 --- a/crates/dtmt/src/cmd/bundle/list.rs +++ b/crates/dtmt/src/cmd/bundle/list.rs @@ -64,7 +64,7 @@ where let v = &f.variants()[0]; println!( "\t{}.{}: {} bytes", - f.base_name(), + f.base_name().display(), f.file_type().ext_name(), v.size() ); diff --git a/crates/dtmt/src/cmd/dictionary.rs b/crates/dtmt/src/cmd/dictionary.rs index e94e2ad..0a2491e 100644 --- a/crates/dtmt/src/cmd/dictionary.rs +++ b/crates/dtmt/src/cmd/dictionary.rs @@ -104,17 +104,23 @@ pub(crate) fn command_definition() -> Command { pub(crate) async fn run(mut ctx: sdk::Context, matches: &ArgMatches) -> Result<()> { match matches.subcommand() { Some(("lookup", sub_matches)) => { - let hash = sub_matches - .get_one::("hash") - .expect("required argument not found"); + let hash = { + let s = sub_matches + .get_one::("hash") + .expect("required argument not found"); + + u64::from_str_radix(s, 16) + .wrap_err("failed to parse argument as hexadecimal string")? + }; let groups = sub_matches .get_many::("group") .unwrap_or_default(); for group in groups { - let value = ctx.lookup_hash(*hash, (*group).into()); - println!("{value}"); + if let IdString64::String(value) = ctx.lookup_hash(hash, (*group).into()) { + println!("{group}: {value}"); + } } Ok(()) diff --git a/lib/sdk/src/bundle/file.rs b/lib/sdk/src/bundle/file.rs index dd3d1a0..383f42a 100644 --- a/lib/sdk/src/bundle/file.rs +++ b/lib/sdk/src/bundle/file.rs @@ -9,7 +9,7 @@ use serde::Serialize; use crate::binary::sync::*; use crate::filetype::*; -use crate::murmur::{HashGroup, Murmur64}; +use crate::murmur::{HashGroup, IdString64, Murmur64}; use super::EntryHeader; @@ -499,7 +499,7 @@ bitflags! { pub struct BundleFile { file_type: BundleFileType, - name: String, + name: IdString64, variants: Vec, props: Properties, } @@ -508,7 +508,7 @@ impl BundleFile { pub fn new(name: String, file_type: BundleFileType) -> Self { Self { file_type, - name, + name: name.into(), variants: Vec::new(), props: Properties::empty(), } @@ -576,7 +576,7 @@ impl BundleFile { let mut w = Cursor::new(Vec::new()); w.write_u64(self.file_type.hash().into())?; - w.write_u64(Murmur64::hash(self.name.as_bytes()).into())?; + w.write_u64(self.name.to_murmur64().into())?; w.write_u32(self.variants.len() as u32)?; // TODO: Figure out what this is @@ -628,12 +628,12 @@ impl BundleFile { self.props } - pub fn base_name(&self) -> &String { + pub fn base_name(&self) -> &IdString64 { &self.name } pub fn name(&self, decompiled: bool, variant: Option) -> String { - let mut s = self.name.clone(); + let mut s = self.name.display().to_string(); s.push('.'); if let Some(variant) = variant { @@ -652,10 +652,18 @@ impl BundleFile { pub fn matches_name(&self, name: S) -> bool where - S: AsRef, + S: Into, { - let name = name.as_ref(); - self.name == name || self.name(false, None) == name || self.name(true, None) == name + let name = name.into(); + if self.name == name { + return true; + } + + if let IdString64::String(name) = name { + self.name(false, None) == name || self.name(true, None) == name + } else { + false + } } pub fn file_type(&self) -> BundleFileType { diff --git a/lib/sdk/src/bundle/mod.rs b/lib/sdk/src/bundle/mod.rs index b9534e4..d54ba33 100644 --- a/lib/sdk/src/bundle/mod.rs +++ b/lib/sdk/src/bundle/mod.rs @@ -67,7 +67,11 @@ impl Bundle { path.file_name() .and_then(|name| name.to_str()) .and_then(|name| Murmur64::try_from(name).ok()) - .map(|hash| ctx.lookup_hash(hash, HashGroup::Filename)) + .map(|hash| { + ctx.lookup_hash(hash, HashGroup::Filename) + .display() + .to_string() + }) .unwrap_or_else(|| path.display().to_string()) } @@ -220,7 +224,7 @@ impl Bundle { for file in self.files.iter() { w.write_u64(file.file_type().into())?; - w.write_u64(Murmur64::hash(file.base_name().as_bytes()).into())?; + w.write_u64(file.base_name().to_murmur64().into())?; w.write_u32(file.props().bits())?; } diff --git a/lib/sdk/src/context.rs b/lib/sdk/src/context.rs index 0116c4a..b0de6dc 100644 --- a/lib/sdk/src/context.rs +++ b/lib/sdk/src/context.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -use crate::murmur::{Dictionary, HashGroup, Murmur32, Murmur64}; +use crate::murmur::{Dictionary, HashGroup, IdString64, Murmur32, Murmur64}; pub struct Context { pub lookup: Dictionary, @@ -21,17 +21,17 @@ impl Context { } } - pub fn lookup_hash(&self, hash: M, group: HashGroup) -> String + pub fn lookup_hash(&self, hash: M, group: HashGroup) -> IdString64 where M: Into, { let hash = hash.into(); if let Some(s) = self.lookup.lookup(hash, group) { tracing::debug!(%hash, string = s, "Murmur64 lookup successful"); - s.to_owned() + s.to_string().into() } else { tracing::debug!(%hash, "Murmur64 lookup failed"); - format!("{hash:016X}") + hash.into() } } diff --git a/lib/sdk/src/filetype/package.rs b/lib/sdk/src/filetype/package.rs index 36e3575..8b42116 100644 --- a/lib/sdk/src/filetype/package.rs +++ b/lib/sdk/src/filetype/package.rs @@ -201,7 +201,10 @@ impl Package { let t = BundleFileType::from(r.read_u64()?); let hash = Murmur64::from(r.read_u64()?); let path = ctx.lookup_hash(hash, HashGroup::Filename); - inner.entry(t).or_default().insert(PathBuf::from(path)); + inner + .entry(t) + .or_default() + .insert(PathBuf::from(path.display().to_string())); } let pkg = Self { diff --git a/lib/sdk/src/murmur/mod.rs b/lib/sdk/src/murmur/mod.rs index 784a6df..d054b48 100644 --- a/lib/sdk/src/murmur/mod.rs +++ b/lib/sdk/src/murmur/mod.rs @@ -236,3 +236,93 @@ impl<'de> Deserialize<'de> for Murmur32 { deserializer.deserialize_any(Self(0)) } } + +// This type encodes the fact that when reading in a bundle, we don't always have a dictionary +// entry for every hash in there. So we do want to have the real string available when needed, +// but at the same time retain the original hash information for when we don't. +// This is especially important when wanting to write back the read bundle, as the hashes need to +// stay the same. +// The previous system of always turning hashes into strings worked well for the purpose of +// displaying hashes, but would have made it very hard to turn a stringyfied hash back into +// an actual hash. +#[derive(Clone, Debug, Eq)] +pub enum IdString64 { + Hash(Murmur64), + String(String), +} + +impl IdString64 { + pub fn to_murmur64(&self) -> Murmur64 { + match self { + Self::Hash(hash) => *hash, + Self::String(s) => Murmur64::hash(s.as_bytes()), + } + } + + pub fn display(&self) -> IdString64Display { + let s = match self { + IdString64::Hash(hash) => hash.to_string(), + IdString64::String(s) => s.clone(), + }; + + IdString64Display(s) + } + + pub fn is_string(&self) -> bool { + match self { + IdString64::Hash(_) => false, + IdString64::String(_) => true, + } + } + + pub fn is_hash(&self) -> bool { + match self { + IdString64::Hash(_) => true, + IdString64::String(_) => false, + } + } +} + +impl From for IdString64 { + fn from(value: String) -> Self { + Self::String(value) + } +} + +impl From for IdString64 { + fn from(value: Murmur64) -> Self { + Self::Hash(value) + } +} + +impl From for Murmur64 { + fn from(value: IdString64) -> Self { + value.to_murmur64() + } +} + +impl PartialEq for IdString64 { + fn eq(&self, other: &Self) -> bool { + self.to_murmur64() == other.to_murmur64() + } +} + +pub struct IdString64Display(String); + +impl std::fmt::Display for IdString64Display { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl std::fmt::UpperHex for IdString64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + std::fmt::UpperHex::fmt(&self.to_murmur64(), f) + } +} + +impl std::fmt::LowerHex for IdString64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + std::fmt::LowerHex::fmt(&self.to_murmur64(), f) + } +}