Implement bundle database resource hashes #184

Merged
lucas merged 3 commits from feat/bundle-database into master 2024-07-29 15:09:26 +02:00
4 changed files with 226 additions and 14 deletions

View file

@ -20,6 +20,8 @@
- dtmm: fetch file version for Nexus mods - dtmm: fetch file version for Nexus mods
- dtmm: handle `nxm://` URIs via IPC and import the corresponding mod - dtmm: handle `nxm://` URIs via IPC and import the corresponding mod
- dtmm: Add button to open mod on nexusmods.com - dtmm: Add button to open mod on nexusmods.com
- dtmt: Implement commands to list bundles and contents
- dtmt: Implement command to search for files
=== Fixed === Fixed

View file

@ -0,0 +1,174 @@
use std::{io::Cursor, path::PathBuf};
use clap::{value_parser, Arg, ArgMatches, Command};
use color_eyre::{eyre::Context as _, Result};
use sdk::murmur::{HashGroup, IdString64, Murmur64};
use sdk::{BundleDatabase, FromBinary as _};
use tokio::fs;
pub(crate) fn command_definition() -> Command {
Command::new("db")
.about("Various operations regarding `bundle_database.data`.")
.subcommand_required(true)
.subcommand(
Command::new("list-files")
.about("List bundle contents")
.arg(
Arg::new("database")
.required(true)
.help("Path to the bundle database")
.value_parser(value_parser!(PathBuf)),
)
.arg(
Arg::new("bundle")
.help("The bundle name. If omitted, all bundles will be listed.")
.required(false),
),
)
.subcommand(
Command::new("list-bundles").about("List bundles").arg(
Arg::new("database")
.required(true)
.help("Path to the bundle database")
.value_parser(value_parser!(PathBuf)),
),
)
.subcommand(
Command::new("find-file")
.about("Find the bundle a file belongs to")
.arg(
Arg::new("database")
.required(true)
.help("Path to the bundle database")
.value_parser(value_parser!(PathBuf)),
)
.arg(
Arg::new("file-name")
.required(true)
.help("Name of the file. May be a hash in hex representation or a string"),
),
)
}
#[tracing::instrument(skip_all)]
pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
let Some((op, sub_matches)) = matches.subcommand() else {
unreachable!("clap is configured to require a subcommand");
};
let database = {
let path = sub_matches
.get_one::<PathBuf>("database")
.expect("argument is required");
let binary = fs::read(&path)
.await
.wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
let mut r = Cursor::new(binary);
BundleDatabase::from_binary(&mut r).wrap_err("Failed to parse bundle database")?
};
match op {
"list-files" => {
let index = database.files();
if let Some(bundle) = sub_matches.get_one::<String>("bundle") {
let hash = u64::from_str_radix(bundle, 16)
.map(Murmur64::from)
.wrap_err("Invalid hex sequence")?;
if let Some(files) = index.get(&hash) {
for file in files {
let name = ctx.lookup_hash(file.name, HashGroup::Filename);
let extension = file.extension.ext_name();
println!("{}.{}", name.display(), extension);
}
} else {
tracing::info!("Bundle {} not found in the database", bundle);
}
} else {
for (bundle_hash, files) in index.iter() {
let bundle_name = ctx.lookup_hash(*bundle_hash, HashGroup::Filename);
match bundle_name {
IdString64::String(name) => {
println!("{:016x} {}", bundle_hash, name);
}
IdString64::Hash(hash) => {
println!("{:016x}", hash);
}
}
for file in files {
let name = ctx.lookup_hash(file.name, HashGroup::Filename);
let extension = file.extension.ext_name();
match name {
IdString64::String(name) => {
println!("\t{:016x}.{:<12} {}", file.name, extension, name);
}
IdString64::Hash(hash) => {
println!("\t{:016x}.{}", hash, extension);
}
}
}
println!();
}
}
Ok(())
}
"list-bundles" => {
for bundle_hash in database.bundles().keys() {
let bundle_name = ctx.lookup_hash(*bundle_hash, HashGroup::Filename);
match bundle_name {
IdString64::String(name) => {
println!("{:016x} {}", bundle_hash, name);
}
IdString64::Hash(hash) => {
println!("{:016x}", hash);
}
}
}
Ok(())
}
"find-file" => {
let name = sub_matches
.get_one::<String>("file-name")
.expect("required argument");
let name = match u64::from_str_radix(name, 16).map(Murmur64::from) {
Ok(hash) => hash,
Err(_) => Murmur64::hash(name),
};
let bundles = database.files().iter().filter_map(|(bundle_hash, files)| {
if files.iter().any(|file| file.name == name) {
Some(bundle_hash)
} else {
None
}
});
let mut found = false;
for bundle in bundles {
found = true;
println!("{:016x}", bundle);
}
if !found {
std::process::exit(1);
}
Ok(())
}
_ => unreachable!(
"clap is configured to require a subcommand, and they're all handled above"
),
}
}

View file

@ -1,6 +1,7 @@
use clap::{ArgMatches, Command}; use clap::{ArgMatches, Command};
use color_eyre::eyre::Result; use color_eyre::eyre::Result;
mod db;
mod decompress; mod decompress;
mod extract; mod extract;
mod inject; mod inject;
@ -14,6 +15,7 @@ pub(crate) fn command_definition() -> Command {
.subcommand(extract::command_definition()) .subcommand(extract::command_definition())
.subcommand(inject::command_definition()) .subcommand(inject::command_definition())
.subcommand(list::command_definition()) .subcommand(list::command_definition())
.subcommand(db::command_definition())
} }
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
@ -23,6 +25,7 @@ pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
Some(("extract", sub_matches)) => extract::run(ctx, sub_matches).await, Some(("extract", sub_matches)) => extract::run(ctx, sub_matches).await,
Some(("inject", sub_matches)) => inject::run(ctx, sub_matches).await, Some(("inject", sub_matches)) => inject::run(ctx, sub_matches).await,
Some(("list", sub_matches)) => list::run(ctx, sub_matches).await, Some(("list", sub_matches)) => list::run(ctx, sub_matches).await,
Some(("db", sub_matches)) => db::run(ctx, sub_matches).await,
_ => unreachable!( _ => unreachable!(
"clap is configured to require a subcommand, and they're all handled above" "clap is configured to require a subcommand, and they're all handled above"
), ),

View file

@ -19,15 +19,15 @@ const DATABASE_VERSION: u32 = 0x6;
const FILE_VERSION: u32 = 0x4; const FILE_VERSION: u32 = 0x4;
pub struct BundleFile { pub struct BundleFile {
name: String, pub name: String,
stream: String, pub stream: String,
platform_specific: bool, pub platform_specific: bool,
file_time: u64, pub file_time: u64,
} }
pub struct FileName { pub struct FileName {
extension: BundleFileType, pub extension: BundleFileType,
name: Murmur64, pub name: Murmur64,
} }
pub struct BundleDatabase { pub struct BundleDatabase {
@ -36,7 +36,34 @@ pub struct BundleDatabase {
bundle_contents: HashMap<Murmur64, Vec<FileName>>, bundle_contents: HashMap<Murmur64, Vec<FileName>>,
} }
// Implements the partial Murmur that's used by the engine to compute bundle resource hashes,
// but in a way that the loop can be done outside the function.
#[inline(always)]
fn add_to_resource_hash(mut k: u64, name: impl Into<u64>) -> u64 {
const M: u64 = 0xc6a4a7935bd1e995;
const R: u64 = 47;
let mut h: u64 = name.into();
k = k.wrapping_mul(M);
k ^= k >> R;
k = k.wrapping_mul(M);
h ^= k;
k = M.wrapping_mul(h);
k
}
impl BundleDatabase { impl BundleDatabase {
pub fn bundles(&self) -> &HashMap<Murmur64, Vec<BundleFile>> {
&self.stored_files
}
pub fn files(&self) -> &HashMap<Murmur64, Vec<FileName>> {
&self.bundle_contents
}
pub fn add_bundle(&mut self, bundle: &Bundle) { pub fn add_bundle(&mut self, bundle: &Bundle) {
let hash = bundle.name().to_murmur64(); let hash = bundle.name().to_murmur64();
let name = hash.to_string(); let name = hash.to_string();
@ -69,20 +96,26 @@ impl BundleDatabase {
} }
} }
let mut resource_hash = 0;
for f in bundle.files() { for f in bundle.files() {
let name = f.base_name().to_murmur64();
let file_name = FileName { let file_name = FileName {
extension: f.file_type(), extension: f.file_type(),
name: f.base_name().to_murmur64(), name,
}; };
// TODO: Compute actual resource hash resource_hash = add_to_resource_hash(resource_hash, name);
self.resource_hashes.insert(hash, 0);
// TODO: Make sure each file name only exists once. Probably best to turn
// the `Vec` into a sorted `HashSet`.
self.bundle_contents self.bundle_contents
.entry(hash) .entry(hash)
.or_default() .or_default()
.push(file_name); .push(file_name);
} }
self.resource_hashes.insert(hash, resource_hash);
} }
} }
@ -103,7 +136,7 @@ impl FromBinary for BundleDatabase {
let mut stored_files = HashMap::with_capacity(num_entries); let mut stored_files = HashMap::with_capacity(num_entries);
for _ in 0..num_entries { for _ in 0..num_entries {
let hash = Murmur64::from(r.read_u64()?); let hash = r.read_u64().map(Murmur64::from)?;
let num_files = r.read_u32()? as usize; let num_files = r.read_u32()? as usize;
let mut files = Vec::with_capacity(num_files); let mut files = Vec::with_capacity(num_files);
@ -161,7 +194,7 @@ impl FromBinary for BundleDatabase {
let mut resource_hashes = HashMap::with_capacity(num_hashes); let mut resource_hashes = HashMap::with_capacity(num_hashes);
for _ in 0..num_hashes { for _ in 0..num_hashes {
let name = Murmur64::from(r.read_u64()?); let name = r.read_u64().map(Murmur64::from)?;
let hash = r.read_u64()?; let hash = r.read_u64()?;
resource_hashes.insert(name, hash); resource_hashes.insert(name, hash);
@ -171,14 +204,14 @@ impl FromBinary for BundleDatabase {
let mut bundle_contents = HashMap::with_capacity(num_contents); let mut bundle_contents = HashMap::with_capacity(num_contents);
for _ in 0..num_contents { for _ in 0..num_contents {
let hash = Murmur64::from(r.read_u64()?); let hash = r.read_u64().map(Murmur64::from)?;
let num_files = r.read_u32()? as usize; let num_files = r.read_u32()? as usize;
let mut files = Vec::with_capacity(num_files); let mut files = Vec::with_capacity(num_files);
for _ in 0..num_files { for _ in 0..num_files {
let extension = BundleFileType::from(r.read_u64()?); let extension = r.read_u64().map(BundleFileType::from)?;
let name = Murmur64::from(r.read_u64()?); let name = r.read_u64().map(Murmur64::from)?;
files.push(FileName { extension, name }); files.push(FileName { extension, name });
} }