feat(lib): Implement IdString type

This type is similar to an `Either` between a `Murmur64` hash and a
`String`. This is necessary to be able to retain hash information where
the hash is not in the dictionary, but at the same time allow string
names where they are available.

Up until now, when reading a bundle, all hashes would be converted to
strings, which made sense for displaying those names. But when writing
the same bundle back, those strings ended up being re-hashed, resulting
in incorrect hashes.
This commit is contained in:
Lucas Schwiderski 2023-02-17 11:13:47 +01:00
parent 1d08498131
commit 036c20bd8c
Signed by: lucas
GPG key ID: AA12679AAA6DF4D8
8 changed files with 137 additions and 26 deletions

View file

@ -58,14 +58,14 @@ pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
Bundle::from_binary(&ctx, name, binary).wrap_err("Failed to open bundle file")?
};
if let Some(_name) = matches.get_one::<String>("replace") {
if let Some(name) = matches.get_one::<String>("replace") {
let mut file = File::open(&file_path)
.await
.wrap_err_with(|| format!("failed to open '{}'", file_path.display()))?;
if let Some(variant) = bundle
.files_mut()
.filter(|file| file.matches_name(_name))
.filter(|file| file.matches_name(name.clone()))
// TODO: Handle file variants
.find_map(|file| file.variants_mut().next())
{
@ -75,7 +75,7 @@ pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
.wrap_err("failed to read input file")?;
variant.set_data(data);
} else {
let err = eyre::eyre!("No file '{}' in this bundle.", _name)
let err = eyre::eyre!("No file '{}' in this bundle.", name)
.with_suggestion(|| {
format!(
"Run '{} bundle list {}' to list the files in this bundle.",
@ -87,7 +87,7 @@ pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
format!(
"Use '{} bundle inject --add {} {} {}' to add it as a new file",
clap::crate_name!(),
_name,
name,
bundle_path.display(),
file_path.display()
)

View file

@ -64,7 +64,7 @@ where
let v = &f.variants()[0];
println!(
"\t{}.{}: {} bytes",
f.base_name(),
f.base_name().display(),
f.file_type().ext_name(),
v.size()
);

View file

@ -104,17 +104,23 @@ pub(crate) fn command_definition() -> Command {
pub(crate) async fn run(mut ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
match matches.subcommand() {
Some(("lookup", sub_matches)) => {
let hash = sub_matches
.get_one::<u64>("hash")
let hash = {
let s = sub_matches
.get_one::<String>("hash")
.expect("required argument not found");
u64::from_str_radix(s, 16)
.wrap_err("failed to parse argument as hexadecimal string")?
};
let groups = sub_matches
.get_many::<HashGroup>("group")
.unwrap_or_default();
for group in groups {
let value = ctx.lookup_hash(*hash, (*group).into());
println!("{value}");
if let IdString64::String(value) = ctx.lookup_hash(hash, (*group).into()) {
println!("{group}: {value}");
}
}
Ok(())

View file

@ -9,7 +9,7 @@ use serde::Serialize;
use crate::binary::sync::*;
use crate::filetype::*;
use crate::murmur::{HashGroup, Murmur64};
use crate::murmur::{HashGroup, IdString64, Murmur64};
use super::EntryHeader;
@ -499,7 +499,7 @@ bitflags! {
pub struct BundleFile {
file_type: BundleFileType,
name: String,
name: IdString64,
variants: Vec<BundleFileVariant>,
props: Properties,
}
@ -508,7 +508,7 @@ impl BundleFile {
pub fn new(name: String, file_type: BundleFileType) -> Self {
Self {
file_type,
name,
name: name.into(),
variants: Vec::new(),
props: Properties::empty(),
}
@ -576,7 +576,7 @@ impl BundleFile {
let mut w = Cursor::new(Vec::new());
w.write_u64(self.file_type.hash().into())?;
w.write_u64(Murmur64::hash(self.name.as_bytes()).into())?;
w.write_u64(self.name.to_murmur64().into())?;
w.write_u32(self.variants.len() as u32)?;
// TODO: Figure out what this is
@ -628,12 +628,12 @@ impl BundleFile {
self.props
}
pub fn base_name(&self) -> &String {
pub fn base_name(&self) -> &IdString64 {
&self.name
}
pub fn name(&self, decompiled: bool, variant: Option<u32>) -> String {
let mut s = self.name.clone();
let mut s = self.name.display().to_string();
s.push('.');
if let Some(variant) = variant {
@ -652,10 +652,18 @@ impl BundleFile {
pub fn matches_name<S>(&self, name: S) -> bool
where
S: AsRef<str>,
S: Into<IdString64>,
{
let name = name.as_ref();
self.name == name || self.name(false, None) == name || self.name(true, None) == name
let name = name.into();
if self.name == name {
return true;
}
if let IdString64::String(name) = name {
self.name(false, None) == name || self.name(true, None) == name
} else {
false
}
}
pub fn file_type(&self) -> BundleFileType {

View file

@ -67,7 +67,11 @@ impl Bundle {
path.file_name()
.and_then(|name| name.to_str())
.and_then(|name| Murmur64::try_from(name).ok())
.map(|hash| ctx.lookup_hash(hash, HashGroup::Filename))
.map(|hash| {
ctx.lookup_hash(hash, HashGroup::Filename)
.display()
.to_string()
})
.unwrap_or_else(|| path.display().to_string())
}
@ -220,7 +224,7 @@ impl Bundle {
for file in self.files.iter() {
w.write_u64(file.file_type().into())?;
w.write_u64(Murmur64::hash(file.base_name().as_bytes()).into())?;
w.write_u64(file.base_name().to_murmur64().into())?;
w.write_u32(file.props().bits())?;
}

View file

@ -1,6 +1,6 @@
use std::path::PathBuf;
use crate::murmur::{Dictionary, HashGroup, Murmur32, Murmur64};
use crate::murmur::{Dictionary, HashGroup, IdString64, Murmur32, Murmur64};
pub struct Context {
pub lookup: Dictionary,
@ -21,17 +21,17 @@ impl Context {
}
}
pub fn lookup_hash<M>(&self, hash: M, group: HashGroup) -> String
pub fn lookup_hash<M>(&self, hash: M, group: HashGroup) -> IdString64
where
M: Into<Murmur64>,
{
let hash = hash.into();
if let Some(s) = self.lookup.lookup(hash, group) {
tracing::debug!(%hash, string = s, "Murmur64 lookup successful");
s.to_owned()
s.to_string().into()
} else {
tracing::debug!(%hash, "Murmur64 lookup failed");
format!("{hash:016X}")
hash.into()
}
}

View file

@ -201,7 +201,10 @@ impl Package {
let t = BundleFileType::from(r.read_u64()?);
let hash = Murmur64::from(r.read_u64()?);
let path = ctx.lookup_hash(hash, HashGroup::Filename);
inner.entry(t).or_default().insert(PathBuf::from(path));
inner
.entry(t)
.or_default()
.insert(PathBuf::from(path.display().to_string()));
}
let pkg = Self {

View file

@ -236,3 +236,93 @@ impl<'de> Deserialize<'de> for Murmur32 {
deserializer.deserialize_any(Self(0))
}
}
// This type encodes the fact that when reading in a bundle, we don't always have a dictionary
// entry for every hash in there. So we do want to have the real string available when needed,
// but at the same time retain the original hash information for when we don't.
// This is especially important when wanting to write back the read bundle, as the hashes need to
// stay the same.
// The previous system of always turning hashes into strings worked well for the purpose of
// displaying hashes, but would have made it very hard to turn a stringyfied hash back into
// an actual hash.
#[derive(Clone, Debug, Eq)]
pub enum IdString64 {
Hash(Murmur64),
String(String),
}
impl IdString64 {
pub fn to_murmur64(&self) -> Murmur64 {
match self {
Self::Hash(hash) => *hash,
Self::String(s) => Murmur64::hash(s.as_bytes()),
}
}
pub fn display(&self) -> IdString64Display {
let s = match self {
IdString64::Hash(hash) => hash.to_string(),
IdString64::String(s) => s.clone(),
};
IdString64Display(s)
}
pub fn is_string(&self) -> bool {
match self {
IdString64::Hash(_) => false,
IdString64::String(_) => true,
}
}
pub fn is_hash(&self) -> bool {
match self {
IdString64::Hash(_) => true,
IdString64::String(_) => false,
}
}
}
impl From<String> for IdString64 {
fn from(value: String) -> Self {
Self::String(value)
}
}
impl From<Murmur64> for IdString64 {
fn from(value: Murmur64) -> Self {
Self::Hash(value)
}
}
impl From<IdString64> for Murmur64 {
fn from(value: IdString64) -> Self {
value.to_murmur64()
}
}
impl PartialEq for IdString64 {
fn eq(&self, other: &Self) -> bool {
self.to_murmur64() == other.to_murmur64()
}
}
pub struct IdString64Display(String);
impl std::fmt::Display for IdString64Display {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::fmt::UpperHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::UpperHex::fmt(&self.to_murmur64(), f)
}
}
impl std::fmt::LowerHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::LowerHex::fmt(&self.to_murmur64(), f)
}
}