From 93db78d58ff2972e3cf1bd92ad6fca748ba9c622 Mon Sep 17 00:00:00 2001 From: Lucas Schwiderski Date: Mon, 30 Oct 2023 09:18:56 +0100 Subject: [PATCH] sdk: Implement decompiling Lua Closes #48. --- CHANGELOG.adoc | 1 + Cargo.lock | 1 + crates/dtmt/Cargo.toml | 1 + crates/dtmt/src/cmd/bundle/extract.rs | 114 +++++++++++++++++++------- lib/sdk/src/binary.rs | 17 ++++ lib/sdk/src/context.rs | 53 +++++++++++- lib/sdk/src/filetype/lua.rs | 91 +++++++++++++++++++- lib/sdk/src/lib.rs | 2 +- 8 files changed, 243 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 2409280..2e2b7a4 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -15,6 +15,7 @@ - dtmm: match mods to Nexus and check for updates - dtmt: add utility to migrate mod projects - dtmm: reset dtkit-patch installations +- sdk: implement decompiling Lua files === Fixed diff --git a/Cargo.lock b/Cargo.lock index 2655f9c..78409bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -964,6 +964,7 @@ dependencies = [ "sdk", "serde", "serde_sjson", + "shlex", "string_template", "tempfile", "tokio", diff --git a/crates/dtmt/Cargo.toml b/crates/dtmt/Cargo.toml index 524ffaa..69bbc31 100644 --- a/crates/dtmt/Cargo.toml +++ b/crates/dtmt/Cargo.toml @@ -33,6 +33,7 @@ path-slash = "0.2.1" async-recursion = "1.0.2" notify = "5.1.0" luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" } +shlex = "1.2.0" [dev-dependencies] tempfile = "3.3.0" diff --git a/crates/dtmt/src/cmd/bundle/extract.rs b/crates/dtmt/src/cmd/bundle/extract.rs index 35dee15..57b7fe9 100644 --- a/crates/dtmt/src/cmd/bundle/extract.rs +++ b/crates/dtmt/src/cmd/bundle/extract.rs @@ -1,3 +1,4 @@ +use std::ffi::OsStr; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -7,7 +8,7 @@ use color_eyre::{Help, Report}; use futures::future::try_join_all; use futures::StreamExt; use glob::Pattern; -use sdk::{Bundle, BundleFile}; +use sdk::{Bundle, BundleFile, CmdLine}; use tokio::fs; use crate::cmd::util::resolve_bundle_paths; @@ -89,30 +90,78 @@ pub(crate) fn command_definition() -> Command { Arg::new("ljd") .long("ljd") .help( - "Path to a custom ljd executable. If not set, \ - `ljd` will be called from PATH.", + "A custom command line to execute ljd with. It is treated as follows:\n\ + * if the argument is a valid path to an existing file:\n\ + ** if the file is called 'main.py', it is assumed that 'python.exe' \ + exists in PATH to execute this with.\n\ + ** otherwise it is treated as an executable\n\ + * if it's a single word, it's treated as an executable in PATH\n\ + * otherwise it is treated as a command line template.\n\ + In any case, the application being run must accept ljd's flags '-c' and '-f'.", ) .default_value("ljd"), ) - .arg( - Arg::new("revorb") - .long("revorb") - .help( - "Path to a custom revorb executable. If not set, \ - `revorb` will be called from PATH.", - ) - .default_value("revorb"), - ) - .arg( - Arg::new("ww2ogg") - .long("ww2ogg") - .help( - "Path to a custom ww2ogg executable. If not set, \ - `ww2ogg` will be called from PATH.\nSee the documentation for how \ - to set up the script for this.", - ) - .default_value("ww2ogg"), - ) + // .arg( + // Arg::new("revorb") + // .long("revorb") + // .help( + // "Path to a custom revorb executable. If not set, \ + // `revorb` will be called from PATH.", + // ) + // .default_value("revorb"), + // ) + // .arg( + // Arg::new("ww2ogg") + // .long("ww2ogg") + // .help( + // "Path to a custom ww2ogg executable. If not set, \ + // `ww2ogg` will be called from PATH.\nSee the documentation for how \ + // to set up the script for this.", + // ) + // .default_value("ww2ogg"), + // ) +} + +#[tracing::instrument] +async fn parse_command_line_template(tmpl: &String) -> Result { + if tmpl.trim().is_empty() { + eyre::bail!("Command line template must not be empty"); + } + + let mut cmd = if matches!(fs::try_exists(tmpl).await, Ok(true)) { + let path = PathBuf::from(tmpl); + if path.file_name() == Some(OsStr::new("main.py")) { + let arg = path.display().to_string(); + let mut cmd = CmdLine::new("python"); + cmd.arg("-c").arg(shlex::quote(&arg).to_string()); + cmd + } else { + CmdLine::new(path) + } + } else { + let Some(args) = shlex::split(tmpl) else { + eyre::bail!("Invalid shell syntax"); + }; + + // We already checked that the template is not empty + let mut cmd = CmdLine::new(args[0].clone()); + let mut it = args.iter(); + // Skip the first one, that's the command name + it.next(); + + for arg in it { + cmd.arg(arg); + } + + cmd + }; + + // Add ljd flags + cmd.arg("-c"); + + tracing::debug!("Parsed command line template: {:?}", cmd); + + Ok(cmd) } #[tracing::instrument(skip_all)] @@ -121,16 +170,19 @@ pub(crate) async fn run(mut ctx: sdk::Context, matches: &ArgMatches) -> Result<( let ljd_bin = matches .get_one::("ljd") .expect("no default value for 'ljd' parameter"); - let revorb_bin = matches - .get_one::("revorb") - .expect("no default value for 'revorb' parameter"); - let ww2ogg_bin = matches - .get_one::("ww2ogg") - .expect("no default value for 'ww2ogg' parameter"); + // let revorb_bin = matches + // .get_one::("revorb") + // .expect("no default value for 'revorb' parameter"); + // let ww2ogg_bin = matches + // .get_one::("ww2ogg") + // .expect("no default value for 'ww2ogg' parameter"); - ctx.ljd = Some(ljd_bin.clone()); - ctx.revorb = Some(revorb_bin.clone()); - ctx.ww2ogg = Some(ww2ogg_bin.clone()); + ctx.ljd = parse_command_line_template(ljd_bin) + .await + .map(Option::Some) + .wrap_err("Failed to parse command line template for flag 'ljd'")?; + // ctx.revorb = Some(revorb_bin.clone()); + // ctx.ww2ogg = Some(ww2ogg_bin.clone()); } let includes = match matches.get_many::("include") { diff --git a/lib/sdk/src/binary.rs b/lib/sdk/src/binary.rs index 9ce3f11..1fcc90e 100644 --- a/lib/sdk/src/binary.rs +++ b/lib/sdk/src/binary.rs @@ -133,6 +133,23 @@ pub mod sync { make_skip!(skip_u8, read_u8, u8); make_skip!(skip_u32, read_u32, u32); + // Implementation based on https://en.wikipedia.com/wiki/LEB128 + fn read_uleb128(&mut self) -> io::Result { + let mut result: u64 = 0; + let mut shift: u64 = 0; + + loop { + let byte = ReadExt::read_u8(self)? as u64; + result |= (byte & 0x7f) << shift; + + if byte < 0x80 { + return Ok(result); + } + + shift += 7; + } + } + fn skip_padding(&mut self) -> io::Result<()> { let pos = self.stream_position()?; let padding_size = 16 - (pos % 16); diff --git a/lib/sdk/src/context.rs b/lib/sdk/src/context.rs index b0de6dc..1500290 100644 --- a/lib/sdk/src/context.rs +++ b/lib/sdk/src/context.rs @@ -1,10 +1,59 @@ -use std::path::PathBuf; +use std::process::Command; +use std::{ffi::OsString, path::PathBuf}; use crate::murmur::{Dictionary, HashGroup, IdString64, Murmur32, Murmur64}; +pub struct CmdLine { + cmd: OsString, + args: Vec, +} + +impl CmdLine { + pub fn new(cmd: impl Into) -> Self { + Self { + cmd: cmd.into(), + args: vec![], + } + } + + pub fn arg(&mut self, arg: impl Into) -> &mut Self { + self.args.push(arg.into()); + self + } +} + +impl std::fmt::Debug for CmdLine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CmdLine") + .field("cmd", &self.cmd) + .field("args", &self.args) + .finish() + } +} + +impl std::fmt::Display for CmdLine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "\"{}\"", self.cmd.to_string_lossy())?; + + for arg in &self.args { + write!(f, " \"{}\"", arg.to_string_lossy())?; + } + + Ok(()) + } +} + +impl From<&CmdLine> for Command { + fn from(value: &CmdLine) -> Self { + let mut cmd = Command::new(&value.cmd); + cmd.args(&value.args); + cmd + } +} + pub struct Context { pub lookup: Dictionary, - pub ljd: Option, + pub ljd: Option, pub revorb: Option, pub ww2ogg: Option, pub game_dir: Option, diff --git a/lib/sdk/src/filetype/lua.rs b/lib/sdk/src/filetype/lua.rs index bb1908c..67d4eb3 100644 --- a/lib/sdk/src/filetype/lua.rs +++ b/lib/sdk/src/filetype/lua.rs @@ -1,24 +1,109 @@ +use std::env; use std::ffi::CStr; use std::ffi::CString; use std::io::Cursor; +use std::io::Read; use std::io::Write; +use std::process::Command; use color_eyre::eyre; use color_eyre::eyre::Context; use color_eyre::Result; use luajit2_sys as lua; +use tokio::fs; +use crate::binary::sync::ReadExt; use crate::binary::sync::WriteExt; use crate::bundle::file::{BundleFileVariant, UserFile}; use crate::{BundleFile, BundleFileType}; +const BITSQUID_LUAJIT_HEADER: u32 = 0x8253461B; + #[tracing::instrument(skip_all, fields(buf_len = data.as_ref().len()))] -pub(crate) async fn decompile(_ctx: &crate::Context, data: T) -> Result> +pub(crate) async fn decompile(ctx: &crate::Context, data: T) -> Result> where T: AsRef<[u8]>, { - let mut _r = Cursor::new(data.as_ref()); - todo!(); + let data = data.as_ref(); + let length = { + let mut r = Cursor::new(data); + r.read_u32()? as usize + }; + + // This skips the unknown bytes 5..12 + let content = &data[12..]; + eyre::ensure!( + content.len() == length, + "Content length doesn't match. Expected {}, got {}", + length, + content.len() + ); + + let name = { + let mut r = Cursor::new(content); + + eyre::ensure!( + r.read_u32()? == BITSQUID_LUAJIT_HEADER, + "Invalid magic bytes" + ); + + // Skip additional header bytes + let _ = r.read_uleb128()?; + let length = r.read_uleb128()? as usize; + + let mut buf = vec![0u8; length]; + r.read_exact(&mut buf)?; + let mut s = String::from_utf8(buf) + .wrap_err_with(|| format!("Invalid byte sequence for LuaJIT bytecode name"))?; + // Remove the leading `@` + s.remove(0); + s + }; + + let mut temp = env::temp_dir(); + // Using the actual file name and keeping it in case of an error makes debugging easier. + // But to avoid creating a bunch of folders, we flatten the name. + temp.push(name.replace('/', "_")); + temp.set_extension("luao"); + + tracing::debug!( + "Writing temporary LuaJIT bytecode file to '{}'", + temp.display() + ); + + fs::write(&temp, content) + .await + .wrap_err_with(|| format!("Failed to write LuaJIT bytecode to '{}'", temp.display()))?; + + let mut cmd = ctx + .ljd + .as_ref() + .map(|c| c.into()) + .unwrap_or_else(|| Command::new("ljd")); + + cmd.arg("-f").arg(&temp); + + tracing::debug!("Executing command: '{:?}'", cmd); + + let output = cmd.output().wrap_err("Failed to run ljd")?; + + if !output.stderr.is_empty() { + eyre::bail!( + "Decompilation failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + + let content = output.stdout; + + if let Err(err) = fs::remove_file(&temp) + .await + .wrap_err("Failed to remove temporary file") + { + tracing::warn!("{:?}", err); + } + + Ok(vec![UserFile::with_name(content, name)]) } #[tracing::instrument(skip_all)] diff --git a/lib/sdk/src/lib.rs b/lib/sdk/src/lib.rs index e229e28..37a4d67 100644 --- a/lib/sdk/src/lib.rs +++ b/lib/sdk/src/lib.rs @@ -8,4 +8,4 @@ pub use binary::{FromBinary, ToBinary}; pub use bundle::database::BundleDatabase; pub use bundle::decompress; pub use bundle::{Bundle, BundleFile, BundleFileType, BundleFileVariant}; -pub use context::Context; +pub use context::{CmdLine, Context};