sdk: Implement decompiling Lua

Closes #48.
This commit is contained in:
Lucas Schwiderski 2023-10-30 09:18:56 +01:00
parent 2f5939d44d
commit 93db78d58f
Signed by: lucas
GPG key ID: AA12679AAA6DF4D8
8 changed files with 243 additions and 37 deletions

View file

@ -15,6 +15,7 @@
- dtmm: match mods to Nexus and check for updates
- dtmt: add utility to migrate mod projects
- dtmm: reset dtkit-patch installations
- sdk: implement decompiling Lua files
=== Fixed

1
Cargo.lock generated
View file

@ -964,6 +964,7 @@ dependencies = [
"sdk",
"serde",
"serde_sjson",
"shlex",
"string_template",
"tempfile",
"tokio",

View file

@ -33,6 +33,7 @@ path-slash = "0.2.1"
async-recursion = "1.0.2"
notify = "5.1.0"
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
shlex = "1.2.0"
[dev-dependencies]
tempfile = "3.3.0"

View file

@ -1,3 +1,4 @@
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::sync::Arc;
@ -7,7 +8,7 @@ use color_eyre::{Help, Report};
use futures::future::try_join_all;
use futures::StreamExt;
use glob::Pattern;
use sdk::{Bundle, BundleFile};
use sdk::{Bundle, BundleFile, CmdLine};
use tokio::fs;
use crate::cmd::util::resolve_bundle_paths;
@ -89,30 +90,78 @@ pub(crate) fn command_definition() -> Command {
Arg::new("ljd")
.long("ljd")
.help(
"Path to a custom ljd executable. If not set, \
`ljd` will be called from PATH.",
"A custom command line to execute ljd with. It is treated as follows:\n\
* if the argument is a valid path to an existing file:\n\
** if the file is called 'main.py', it is assumed that 'python.exe' \
exists in PATH to execute this with.\n\
** otherwise it is treated as an executable\n\
* if it's a single word, it's treated as an executable in PATH\n\
* otherwise it is treated as a command line template.\n\
In any case, the application being run must accept ljd's flags '-c' and '-f'.",
)
.default_value("ljd"),
)
.arg(
Arg::new("revorb")
.long("revorb")
.help(
"Path to a custom revorb executable. If not set, \
`revorb` will be called from PATH.",
)
.default_value("revorb"),
)
.arg(
Arg::new("ww2ogg")
.long("ww2ogg")
.help(
"Path to a custom ww2ogg executable. If not set, \
`ww2ogg` will be called from PATH.\nSee the documentation for how \
to set up the script for this.",
)
.default_value("ww2ogg"),
)
// .arg(
// Arg::new("revorb")
// .long("revorb")
// .help(
// "Path to a custom revorb executable. If not set, \
// `revorb` will be called from PATH.",
// )
// .default_value("revorb"),
// )
// .arg(
// Arg::new("ww2ogg")
// .long("ww2ogg")
// .help(
// "Path to a custom ww2ogg executable. If not set, \
// `ww2ogg` will be called from PATH.\nSee the documentation for how \
// to set up the script for this.",
// )
// .default_value("ww2ogg"),
// )
}
#[tracing::instrument]
async fn parse_command_line_template(tmpl: &String) -> Result<CmdLine> {
if tmpl.trim().is_empty() {
eyre::bail!("Command line template must not be empty");
}
let mut cmd = if matches!(fs::try_exists(tmpl).await, Ok(true)) {
let path = PathBuf::from(tmpl);
if path.file_name() == Some(OsStr::new("main.py")) {
let arg = path.display().to_string();
let mut cmd = CmdLine::new("python");
cmd.arg("-c").arg(shlex::quote(&arg).to_string());
cmd
} else {
CmdLine::new(path)
}
} else {
let Some(args) = shlex::split(tmpl) else {
eyre::bail!("Invalid shell syntax");
};
// We already checked that the template is not empty
let mut cmd = CmdLine::new(args[0].clone());
let mut it = args.iter();
// Skip the first one, that's the command name
it.next();
for arg in it {
cmd.arg(arg);
}
cmd
};
// Add ljd flags
cmd.arg("-c");
tracing::debug!("Parsed command line template: {:?}", cmd);
Ok(cmd)
}
#[tracing::instrument(skip_all)]
@ -121,16 +170,19 @@ pub(crate) async fn run(mut ctx: sdk::Context, matches: &ArgMatches) -> Result<(
let ljd_bin = matches
.get_one::<String>("ljd")
.expect("no default value for 'ljd' parameter");
let revorb_bin = matches
.get_one::<String>("revorb")
.expect("no default value for 'revorb' parameter");
let ww2ogg_bin = matches
.get_one::<String>("ww2ogg")
.expect("no default value for 'ww2ogg' parameter");
// let revorb_bin = matches
// .get_one::<String>("revorb")
// .expect("no default value for 'revorb' parameter");
// let ww2ogg_bin = matches
// .get_one::<String>("ww2ogg")
// .expect("no default value for 'ww2ogg' parameter");
ctx.ljd = Some(ljd_bin.clone());
ctx.revorb = Some(revorb_bin.clone());
ctx.ww2ogg = Some(ww2ogg_bin.clone());
ctx.ljd = parse_command_line_template(ljd_bin)
.await
.map(Option::Some)
.wrap_err("Failed to parse command line template for flag 'ljd'")?;
// ctx.revorb = Some(revorb_bin.clone());
// ctx.ww2ogg = Some(ww2ogg_bin.clone());
}
let includes = match matches.get_many::<Pattern>("include") {

View file

@ -133,6 +133,23 @@ pub mod sync {
make_skip!(skip_u8, read_u8, u8);
make_skip!(skip_u32, read_u32, u32);
// Implementation based on https://en.wikipedia.com/wiki/LEB128
fn read_uleb128(&mut self) -> io::Result<u64> {
let mut result: u64 = 0;
let mut shift: u64 = 0;
loop {
let byte = ReadExt::read_u8(self)? as u64;
result |= (byte & 0x7f) << shift;
if byte < 0x80 {
return Ok(result);
}
shift += 7;
}
}
fn skip_padding(&mut self) -> io::Result<()> {
let pos = self.stream_position()?;
let padding_size = 16 - (pos % 16);

View file

@ -1,10 +1,59 @@
use std::path::PathBuf;
use std::process::Command;
use std::{ffi::OsString, path::PathBuf};
use crate::murmur::{Dictionary, HashGroup, IdString64, Murmur32, Murmur64};
pub struct CmdLine {
cmd: OsString,
args: Vec<OsString>,
}
impl CmdLine {
pub fn new(cmd: impl Into<OsString>) -> Self {
Self {
cmd: cmd.into(),
args: vec![],
}
}
pub fn arg(&mut self, arg: impl Into<OsString>) -> &mut Self {
self.args.push(arg.into());
self
}
}
impl std::fmt::Debug for CmdLine {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("CmdLine")
.field("cmd", &self.cmd)
.field("args", &self.args)
.finish()
}
}
impl std::fmt::Display for CmdLine {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "\"{}\"", self.cmd.to_string_lossy())?;
for arg in &self.args {
write!(f, " \"{}\"", arg.to_string_lossy())?;
}
Ok(())
}
}
impl From<&CmdLine> for Command {
fn from(value: &CmdLine) -> Self {
let mut cmd = Command::new(&value.cmd);
cmd.args(&value.args);
cmd
}
}
pub struct Context {
pub lookup: Dictionary,
pub ljd: Option<String>,
pub ljd: Option<CmdLine>,
pub revorb: Option<String>,
pub ww2ogg: Option<String>,
pub game_dir: Option<PathBuf>,

View file

@ -1,24 +1,109 @@
use std::env;
use std::ffi::CStr;
use std::ffi::CString;
use std::io::Cursor;
use std::io::Read;
use std::io::Write;
use std::process::Command;
use color_eyre::eyre;
use color_eyre::eyre::Context;
use color_eyre::Result;
use luajit2_sys as lua;
use tokio::fs;
use crate::binary::sync::ReadExt;
use crate::binary::sync::WriteExt;
use crate::bundle::file::{BundleFileVariant, UserFile};
use crate::{BundleFile, BundleFileType};
const BITSQUID_LUAJIT_HEADER: u32 = 0x8253461B;
#[tracing::instrument(skip_all, fields(buf_len = data.as_ref().len()))]
pub(crate) async fn decompile<T>(_ctx: &crate::Context, data: T) -> Result<Vec<UserFile>>
pub(crate) async fn decompile<T>(ctx: &crate::Context, data: T) -> Result<Vec<UserFile>>
where
T: AsRef<[u8]>,
{
let mut _r = Cursor::new(data.as_ref());
todo!();
let data = data.as_ref();
let length = {
let mut r = Cursor::new(data);
r.read_u32()? as usize
};
// This skips the unknown bytes 5..12
let content = &data[12..];
eyre::ensure!(
content.len() == length,
"Content length doesn't match. Expected {}, got {}",
length,
content.len()
);
let name = {
let mut r = Cursor::new(content);
eyre::ensure!(
r.read_u32()? == BITSQUID_LUAJIT_HEADER,
"Invalid magic bytes"
);
// Skip additional header bytes
let _ = r.read_uleb128()?;
let length = r.read_uleb128()? as usize;
let mut buf = vec![0u8; length];
r.read_exact(&mut buf)?;
let mut s = String::from_utf8(buf)
.wrap_err_with(|| format!("Invalid byte sequence for LuaJIT bytecode name"))?;
// Remove the leading `@`
s.remove(0);
s
};
let mut temp = env::temp_dir();
// Using the actual file name and keeping it in case of an error makes debugging easier.
// But to avoid creating a bunch of folders, we flatten the name.
temp.push(name.replace('/', "_"));
temp.set_extension("luao");
tracing::debug!(
"Writing temporary LuaJIT bytecode file to '{}'",
temp.display()
);
fs::write(&temp, content)
.await
.wrap_err_with(|| format!("Failed to write LuaJIT bytecode to '{}'", temp.display()))?;
let mut cmd = ctx
.ljd
.as_ref()
.map(|c| c.into())
.unwrap_or_else(|| Command::new("ljd"));
cmd.arg("-f").arg(&temp);
tracing::debug!("Executing command: '{:?}'", cmd);
let output = cmd.output().wrap_err("Failed to run ljd")?;
if !output.stderr.is_empty() {
eyre::bail!(
"Decompilation failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
let content = output.stdout;
if let Err(err) = fs::remove_file(&temp)
.await
.wrap_err("Failed to remove temporary file")
{
tracing::warn!("{:?}", err);
}
Ok(vec![UserFile::with_name(content, name)])
}
#[tracing::instrument(skip_all)]

View file

@ -8,4 +8,4 @@ pub use binary::{FromBinary, ToBinary};
pub use bundle::database::BundleDatabase;
pub use bundle::decompress;
pub use bundle::{Bundle, BundleFile, BundleFileType, BundleFileVariant};
pub use context::Context;
pub use context::{CmdLine, Context};