Perform various optimizations #173

Merged
lucas merged 19 commits from feat/optimization into master 2024-07-10 19:45:52 +02:00
24 changed files with 1138 additions and 829 deletions

View file

@ -37,6 +37,7 @@ RUN set -eux; \
apt-get update; \
apt-get install --no-install-recommends -y \
build-essential \
cmake \
curl \
git \
gpg \

722
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -8,11 +8,16 @@ members = [
"lib/sdk",
"lib/serde_sjson",
"lib/luajit2-sys",
"lib/color-eyre",
]
exclude = ["lib/color-eyre"]
[workspace.dependencies]
zip = { version = "2.1.3", default-features = false, features = ["deflate", "bzip2", "zstd", "time"] }
[patch.crates-io]
color-eyre = { path = "lib/color-eyre" }
ansi-parser = { git = "https://gitlab.com/lschwiderski/ansi-parser.git", branch = "issue/outdated-heapless", version = "0.9.1" }
[profile.dev.package.backtrace]
opt-level = 3
@ -26,3 +31,9 @@ strip = "debuginfo"
[profile.release-lto]
inherits = "release"
lto = true
[profile.perf]
inherits = "release"
strip = false
lto = true
debug = "line-tables-only"

View file

@ -1,5 +1,13 @@
set positional-arguments
fly_target := "main"
build-perf-dtmt:
cargo build --profile perf --bin dtmt
perf-dtmt *args='': build-perf-dtmt
perf record --call-graph dwarf ./target/perf/dtmt "$@"
ci-build: ci-build-msvc ci-build-linux
ci-build-msvc:

View file

@ -9,19 +9,19 @@ edition = "2021"
ansi-parser = "0.9.0"
async-recursion = "1.0.5"
bincode = "1.3.3"
bitflags = "1.3.2"
bitflags = "2.5.0"
clap = { version = "4.0.15", features = ["color", "derive", "std", "cargo", "string", "unicode"] }
color-eyre = "0.6.2"
colors-transform = "0.2.11"
confy = "0.5.1"
confy = "0.6.1"
druid = { version = "0.8", features = ["im", "serde", "image", "png", "jpeg", "bmp", "webp", "svg"] }
druid-widget-nursery = "0.1"
dtmt-shared = { path = "../../lib/dtmt-shared", version = "*" }
futures = "0.3.25"
interprocess = { version = "1.2.1", default-features = false }
interprocess = "2.1.0"
lazy_static = "1.4.0"
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
minijinja = "1.0.10"
minijinja = { version = "2.0.1", default-features = false }
nexusmods = { path = "../../lib/nexusmods", version = "*" }
oodle = { path = "../../lib/oodle", version = "*" }
open = "5.0.1"
@ -29,8 +29,7 @@ path-slash = "0.2.1"
sdk = { path = "../../lib/sdk", version = "*" }
serde = { version = "1.0.152", features = ["derive", "rc"] }
serde_sjson = { path = "../../lib/serde_sjson", version = "*" }
string_template = "0.2.1"
strip-ansi-escapes = "0.1.1"
strip-ansi-escapes = "0.2.0"
time = { version = "0.3.20", features = ["serde", "serde-well-known", "local-offset"] }
tokio = { version = "1.23.0", features = ["rt", "fs", "tracing", "sync"] }
tokio-stream = { version = "0.1.12", features = ["fs"] }
@ -38,4 +37,4 @@ tracing = "0.1.37"
tracing-error = "0.2.0"
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }
usvg = "0.25.0"
zip = "0.6.4"
zip = { workspace = true }

View file

@ -17,7 +17,7 @@ local require_store = {}
-- This token is treated as a string template and filled by DTMM during deployment.
-- This allows hiding unsafe I/O functions behind a setting.
-- It's also a valid table definition, thereby degrading gracefully when not replaced.
-- When not replaced, it's also a valid table definition, thereby degrading gracefully.
local is_io_enabled = {{ is_io_enabled }} -- luacheck: ignore 113
local lua_libs = {
debug = debug,
@ -207,3 +207,5 @@ function init()
Main:init()
end
-- vim: ft=lua

View file

@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::io::{Cursor, ErrorKind};
use std::path::{Path, PathBuf};
use std::str::FromStr;
@ -16,7 +15,6 @@ use sdk::{
Bundle, BundleDatabase, BundleFile, BundleFileType, BundleFileVariant, FromBinary, ToBinary,
};
use serde::{Deserialize, Serialize};
use string_template::Template;
use time::OffsetDateTime;
use tokio::fs::{self, DirEntry};
use tokio::io::AsyncWriteExt;
@ -572,12 +570,17 @@ async fn patch_boot_bundle(state: Arc<ActionState>) -> Result<Vec<Bundle>> {
let span = tracing::debug_span!("Importing mod main script");
let _enter = span.enter();
let is_io_enabled = format!("{}", state.is_io_enabled);
let mut data = HashMap::new();
data.insert("is_io_enabled", is_io_enabled.as_str());
let mut env = Environment::new();
env.add_template("mod_main.lua", include_str!("../../assets/mod_main.lua.j2"))
.wrap_err("Failed to compile template for `mod_main.lua`")?;
let tmpl = env
.get_template("mod_main.lua")
.wrap_err("Failed to get template `mod_main.lua`")?;
let lua = tmpl
.render(minijinja::context!(is_io_enabled => if state.is_io_enabled { "true" } else {"false"}))
.wrap_err("Failed to render template `mod_main.lua`")?;
let tmpl = include_str!("../../assets/mod_main.lua");
let lua = Template::new(tmpl).render(&data);
tracing::trace!("Main script rendered:\n===========\n{}\n=============", lua);
let file =
lua::compile(MOD_BOOT_SCRIPT, lua).wrap_err("Failed to compile mod main Lua file")?;
@ -707,7 +710,7 @@ pub(crate) async fn deploy_mods(state: ActionState) -> Result<()> {
},
async {
let path = state.game_dir.join(DEPLOYMENT_DATA_PATH);
match read_sjson_file::<_, DeploymentData>(path).await {
match read_sjson_file::<_, DeploymentData>(&path).await {
Ok(data) => Ok(Some(data)),
Err(err) => {
if let Some(err) = err.downcast_ref::<std::io::Error>()
@ -715,7 +718,10 @@ pub(crate) async fn deploy_mods(state: ActionState) -> Result<()> {
{
Ok(None)
} else {
Err(err).wrap_err("Failed to read deployment data")
Err(err).wrap_err(format!(
"Failed to read deployment data from: {}",
path.display()
))
}
}
}

View file

@ -11,7 +11,7 @@ use clap::{command, value_parser, Arg};
use color_eyre::eyre::{self, Context};
use color_eyre::{Report, Result, Section};
use druid::AppLauncher;
use interprocess::local_socket::{LocalSocketListener, LocalSocketStream};
use interprocess::local_socket::{prelude::*, GenericNamespaced, ListenerOptions};
use tokio::sync::RwLock;
use crate::controller::worker::work_thread;
@ -29,9 +29,9 @@ mod util {
}
mod ui;
// As explained in https://docs.rs/interprocess/latest/interprocess/local_socket/enum.NameTypeSupport.html
// As explained in https://docs.rs/interprocess/2.1.0/interprocess/local_socket/struct.Name.html
// namespaces are supported on both platforms we care about: Windows and Linux.
const IPC_ADDRESS: &str = "@dtmm.sock";
const IPC_ADDRESS: &str = "dtmm.sock";
#[tracing::instrument]
fn notify_nxm_download(
@ -42,9 +42,13 @@ fn notify_nxm_download(
tracing::debug!("Received Uri '{}', sending to main process.", uri.as_ref());
let mut stream = LocalSocketStream::connect(IPC_ADDRESS)
.wrap_err_with(|| format!("Failed to connect to '{}'", IPC_ADDRESS))
.suggestion("Make sure the main window is open.")?;
let mut stream = LocalSocketStream::connect(
IPC_ADDRESS
.to_ns_name::<GenericNamespaced>()
.expect("Invalid socket name"),
)
.wrap_err_with(|| format!("Failed to connect to '{}'", IPC_ADDRESS))
.suggestion("Make sure the main window is open.")?;
tracing::debug!("Connected to main process at '{}'", IPC_ADDRESS);
@ -130,8 +134,14 @@ fn main() -> Result<()> {
let _guard = span.enter();
let event_sink = event_sink.clone();
let server =
LocalSocketListener::bind(IPC_ADDRESS).wrap_err("Failed to create IPC listener")?;
let server = ListenerOptions::new()
.name(
IPC_ADDRESS
.to_ns_name::<GenericNamespaced>()
.expect("Invalid socket name"),
)
.create_sync()
.wrap_err("Failed to create IPC listener")?;
tracing::debug!("IPC server listening on '{}'", IPC_ADDRESS);

View file

@ -7,13 +7,12 @@ edition = "2021"
clap = { version = "4.0.15", features = ["color", "derive", "std", "cargo", "unicode"] }
cli-table = { version = "0.4.7", default-features = false, features = ["derive"] }
color-eyre = "0.6.2"
confy = "0.5.1"
confy = "0.6.1"
csv-async = { version = "1.2.4", features = ["tokio", "serde"] }
dtmt-shared = { path = "../../lib/dtmt-shared", version = "*" }
futures = "0.3.25"
futures-util = "0.3.24"
glob = "0.3.0"
libloading = "0.7.4"
nanorand = "0.7.0"
oodle = { path = "../../lib/oodle", version = "*" }
pin-project-lite = "0.2.9"
@ -27,13 +26,16 @@ tokio = { version = "1.21.2", features = ["rt-multi-thread", "fs", "process", "m
tracing-error = "0.2.0"
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }
tracing = { version = "0.1.37", features = ["async-await"] }
zip = "0.6.3"
zip = { workspace = true }
path-clean = "1.0.1"
path-slash = "0.2.1"
async-recursion = "1.0.2"
notify = "5.1.0"
notify = "6.1.1"
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
shlex = "1.2.0"
shlex = { version = "1.2.0", optional = true }
[dev-dependencies]
tempfile = "3.3.0"
[features]
shlex-bench = ["dep:shlex"]

View file

@ -3,7 +3,7 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
use color_eyre::eyre::{self, Context, Result};
use color_eyre::eyre::{self, bail, Context, Result};
use color_eyre::{Help, Report};
use futures::future::try_join_all;
use futures::StreamExt;
@ -12,7 +12,9 @@ use sdk::{Bundle, BundleFile, CmdLine};
use tokio::fs;
use crate::cmd::util::resolve_bundle_paths;
use crate::shell_parse::ShellParser;
#[inline]
fn parse_glob_pattern(s: &str) -> Result<Pattern, String> {
match Pattern::new(s) {
Ok(p) => Ok(p),
@ -20,6 +22,7 @@ fn parse_glob_pattern(s: &str) -> Result<Pattern, String> {
}
}
#[inline]
fn flatten_name(s: &str) -> String {
s.replace('/', "_")
}
@ -131,26 +134,29 @@ async fn parse_command_line_template(tmpl: &String) -> Result<CmdLine> {
let mut cmd = if matches!(fs::try_exists(tmpl).await, Ok(true)) {
let path = PathBuf::from(tmpl);
if path.file_name() == Some(OsStr::new("main.py")) {
let arg = path.display().to_string();
let mut cmd = CmdLine::new("python");
cmd.arg(shlex::quote(&arg).to_string());
cmd.arg(path);
cmd
} else {
CmdLine::new(path)
}
} else {
let Some(args) = shlex::split(tmpl) else {
eyre::bail!("Invalid shell syntax");
};
let mut parsed = ShellParser::new(tmpl.as_bytes());
// Safety: The initial `tmpl` was a `&String` (i.e. valid UTF-8), and `shlex` does not
// insert or remove characters, nor does it split UTF-8 characters.
// So the resulting byte stream is still valid UTF-8.
let mut cmd = CmdLine::new(unsafe {
let bytes = parsed.next().expect("Template is not empty");
String::from_utf8_unchecked(bytes.to_vec())
});
// We already checked that the template is not empty
let mut cmd = CmdLine::new(args[0].clone());
let mut it = args.iter();
// Skip the first one, that's the command name
it.next();
while let Some(arg) = parsed.next() {
// Safety: See above.
cmd.arg(unsafe { String::from_utf8_unchecked(arg.to_vec()) });
}
for arg in it {
cmd.arg(arg);
if parsed.errored {
bail!("Invalid command line template");
}
cmd

View file

@ -1,6 +1,5 @@
use std::io::{Cursor, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use clap::{value_parser, Arg, ArgMatches, Command};
use color_eyre::eyre::{Context, Result};
@ -8,9 +7,9 @@ use color_eyre::Help;
use dtmt_shared::ModConfig;
use path_slash::{PathBufExt, PathExt};
use tokio::fs;
use tokio::sync::Mutex;
use tokio_stream::wrappers::ReadDirStream;
use tokio_stream::StreamExt;
use zip::write::SimpleFileOptions;
use zip::ZipWriter;
use crate::cmd::build::read_project_config;
@ -51,11 +50,7 @@ pub(crate) fn command_definition() -> Command {
}
#[async_recursion::async_recursion]
async fn process_directory<P1, P2, W>(
zip: Arc<Mutex<ZipWriter<W>>>,
path: P1,
prefix: P2,
) -> Result<()>
async fn process_directory<P1, P2, W>(zip: &mut ZipWriter<W>, path: P1, prefix: P2) -> Result<()>
where
P1: AsRef<Path> + std::marker::Send,
P2: AsRef<Path> + std::marker::Send,
@ -64,9 +59,7 @@ where
let path = path.as_ref();
let prefix = prefix.as_ref();
zip.lock()
.await
.add_directory(prefix.to_slash_lossy(), Default::default())?;
zip.add_directory(prefix.to_slash_lossy(), SimpleFileOptions::default())?;
let read_dir = fs::read_dir(&path)
.await
@ -87,12 +80,11 @@ where
.await
.wrap_err_with(|| format!("Failed to read '{}'", in_path.display()))?;
{
let mut zip = zip.lock().await;
zip.start_file(out_path.to_slash_lossy(), Default::default())?;
zip.start_file(out_path.to_slash_lossy(), SimpleFileOptions::default())?;
zip.write_all(&data)?;
}
} else if t.is_dir() {
process_directory(zip.clone(), in_path, out_path).await?;
process_directory(zip, in_path, out_path).await?;
}
}
@ -107,16 +99,12 @@ where
let path = path.as_ref();
let dest = dest.as_ref();
let data = Cursor::new(Vec::new());
let zip = ZipWriter::new(data);
let zip = Arc::new(Mutex::new(zip));
let mut zip = ZipWriter::new(Cursor::new(Vec::with_capacity(1024)));
process_directory(zip.clone(), path, PathBuf::from(&cfg.id))
process_directory(&mut zip, path, PathBuf::from(&cfg.id))
.await
.wrap_err("Failed to add directory to archive")?;
let mut zip = zip.lock().await;
{
let name = PathBuf::from(&cfg.id).join("dtmt.cfg");
let path = cfg.dir.join("dtmt.cfg");
@ -125,7 +113,7 @@ where
.await
.wrap_err_with(|| format!("Failed to read mod config at {}", path.display()))?;
zip.start_file(name.to_slash_lossy(), Default::default())?;
zip.start_file(name.to_slash_lossy(), SimpleFileOptions::default())?;
zip.write_all(&data)?;
}

View file

@ -1,6 +1,7 @@
#![feature(io_error_more)]
#![feature(let_chains)]
#![feature(result_flattening)]
#![feature(test)]
#![windows_subsystem = "console"]
use std::path::PathBuf;
@ -27,6 +28,7 @@ mod cmd {
mod util;
pub mod watch;
}
mod shell_parse;
#[derive(Default, Deserialize, Serialize)]
struct GlobalConfig {

View file

@ -0,0 +1,189 @@
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
enum ParserState {
Start,
Word,
SingleQuote,
DoubleQuote,
}
pub struct ShellParser<'a> {
bytes: &'a [u8],
offset: usize,
pub errored: bool,
}
impl<'a> ShellParser<'a> {
pub fn new(bytes: &'a [u8]) -> Self {
Self {
bytes,
offset: 0,
errored: false,
}
}
fn parse_word(&mut self) -> Option<&'a [u8]> {
// The start of the current word. Certain leading characters should be ignored,
// so this might change.
let mut start = self.offset;
let mut state = ParserState::Start;
while self.offset < self.bytes.len() {
let c = self.bytes[self.offset];
self.offset += 1;
match state {
ParserState::Start => match c {
// Ignore leading whitespace
b' ' | b'\t' | b'\n' => start += 1,
b'\'' => {
state = ParserState::SingleQuote;
start += 1;
}
b'"' => {
state = ParserState::DoubleQuote;
start += 1;
}
_ => {
state = ParserState::Word;
}
},
ParserState::Word => match c {
// Unquoted whitespace ends the current word
b' ' | b'\t' | b'\n' => {
return Some(&self.bytes[start..self.offset - 1]);
}
_ => {}
},
ParserState::SingleQuote => match c {
b'\'' => {
return Some(&self.bytes[start..(self.offset - 1)]);
}
_ => {}
},
ParserState::DoubleQuote => match c {
b'"' => {
return Some(&self.bytes[start..(self.offset - 1)]);
}
_ => {}
},
}
}
match state {
ParserState::Start => None,
ParserState::Word => Some(&self.bytes[start..self.offset]),
ParserState::SingleQuote | ParserState::DoubleQuote => {
self.errored = true;
None
}
}
}
}
impl<'a> Iterator for ShellParser<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
self.parse_word()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_one_word() {
let mut it = ShellParser::new(b"hello");
assert_eq!(it.next(), Some("hello".as_bytes()));
assert_eq!(it.next(), None);
}
#[test]
fn test_one_single() {
let mut it = ShellParser::new(b"'hello'");
assert_eq!(it.next(), Some("hello".as_bytes()));
assert_eq!(it.next(), None);
}
#[test]
fn test_open_quote() {
let mut it = ShellParser::new(b"'hello");
assert_eq!(it.next(), None);
assert!(it.errored)
}
#[test]
fn test_ww2ogg() {
let mut it = ShellParser::new(
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
);
assert_eq!(it.next(), Some("ww2ogg.exe".as_bytes()));
assert_eq!(it.next(), Some("--pcb".as_bytes()));
assert_eq!(
it.next(),
Some("/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin".as_bytes())
);
assert_eq!(it.next(), None);
}
}
#[cfg(test)]
mod bench {
extern crate test;
use super::*;
#[cfg(feature = "shlex-bench")]
use shlex::bytes::Shlex;
use test::Bencher;
mod ww2ogg {
use super::*;
#[bench]
fn custom(b: &mut Bencher) {
let val = test::black_box(
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
);
b.iter(|| {
let it = ShellParser::new(val);
let _: Vec<_> = test::black_box(it.collect());
})
}
#[cfg(feature = "shlex-bench")]
#[bench]
fn shlex(b: &mut Bencher) {
let val = test::black_box(
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
);
b.iter(|| {
let it = Shlex::new(val);
let _: Vec<_> = test::black_box(it.collect());
})
}
}
mod one_single {
use super::*;
#[bench]
fn custom(b: &mut Bencher) {
let val = test::black_box(b"'hello'");
b.iter(|| {
let it = ShellParser::new(val);
let _: Vec<_> = test::black_box(it.collect());
})
}
#[cfg(feature = "shlex-bench")]
#[bench]
fn shlex(b: &mut Bencher) {
let val = test::black_box(b"'hello'");
b.iter(|| {
let it = Shlex::new(val);
let _: Vec<_> = test::black_box(it.collect());
})
}
}
}

View file

@ -9,7 +9,7 @@ edition = "2021"
ansi_term = "0.12.1"
color-eyre = "0.6.2"
serde = "1.0.152"
steamlocate = "2.0.0-alpha.0"
steamlocate = "2.0.0-beta.2"
time = { version = "0.3.19", features = ["formatting", "local-offset", "macros"] }
tracing = "0.1.37"
tracing-error = "0.2.0"

@ -1 +1 @@
Subproject commit 19120166f9fc7838b98c71fc348791abc820e323
Subproject commit 5d1a075742395f767c79d9c0d7466c6fb442f106

View file

@ -9,7 +9,7 @@ edition = "2021"
futures = "0.3.26"
lazy_static = "1.4.0"
regex = "1.7.1"
reqwest = { version = "0.11.14" }
reqwest = { version = "0.12.4" }
serde = { version = "1.0.152", features = ["derive"] }
serde_json = "1.0.94"
thiserror = "1.0.39"

View file

@ -10,4 +10,4 @@ color-eyre = "0.6.2"
tracing = "0.1.37"
[build-dependencies]
bindgen = "0.64.0"
bindgen = "0.69.4"

View file

@ -1,5 +1,3 @@
extern crate bindgen;
use std::env;
use std::path::PathBuf;
@ -33,7 +31,7 @@ fn main() {
.blocklist_file("stdlib.h")
// Tell cargo to invalidate the built crate whenever any of the
// included header files changed.
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
// Finish the builder and generate the bindings.
.generate()
// Unwrap the Result and panic on failure.

View file

@ -4,15 +4,14 @@ version = "0.3.0"
edition = "2021"
[dependencies]
bitflags = "1.3.2"
bitflags = "2.5.0"
byteorder = "1.4.3"
color-eyre = "0.6.2"
csv-async = { version = "1.2.4", features = ["tokio", "serde"] }
fastrand = "1.8.0"
fastrand = "2.1.0"
futures = "0.3.25"
futures-util = "0.3.24"
glob = "0.3.0"
libloading = "0.7.4"
nanorand = "0.7.0"
pin-project-lite = "0.2.9"
serde = { version = "1.0.147", features = ["derive"] }

View file

@ -501,7 +501,7 @@ impl BundleFileVariant {
}
bitflags! {
#[derive(Default)]
#[derive(Default, Clone, Copy, Debug)]
pub struct Properties: u32 {
const DATA = 0b100;
}

View file

@ -1,3 +1,5 @@
#![feature(test)]
mod binary;
mod bundle;
mod context;

View file

@ -1,15 +1,16 @@
use std::fmt;
use color_eyre::eyre::Context;
use color_eyre::Report;
use color_eyre::{Report, Result};
use serde::de::Visitor;
use serde::{Deserialize, Serialize};
use serde::{Deserializer, Serializer};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
mod dictionary;
// Currently unused
// mod murmurhash32;
mod murmurhash64;
mod types;
mod util;
pub const SEED: u32 = 0;
@ -18,372 +19,4 @@ pub use murmurhash64::hash;
pub use murmurhash64::hash32;
pub use murmurhash64::hash_inverse as inverse;
fn _swap_bytes_u32(value: u32) -> u32 {
u32::from_le_bytes(value.to_be_bytes())
}
fn _swap_bytes_u64(value: u64) -> u64 {
u64::from_le_bytes(value.to_be_bytes())
}
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub struct Murmur64(u64);
impl Murmur64 {
pub fn hash<B>(s: B) -> Self
where
B: AsRef<[u8]>,
{
hash(s.as_ref(), SEED as u64).into()
}
}
impl From<u64> for Murmur64 {
fn from(value: u64) -> Self {
Self(value)
}
}
impl From<Murmur64> for u64 {
fn from(value: Murmur64) -> Self {
value.0
}
}
impl TryFrom<&str> for Murmur64 {
type Error = Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
u64::from_str_radix(value, 16)
.map(Self)
.wrap_err_with(|| format!("Failed to convert value to Murmur64: {value}"))
}
}
impl fmt::UpperHex for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl fmt::LowerHex for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.0, f)
}
}
impl fmt::Display for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl<'de> Visitor<'de> for Murmur64 {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str(
"an usigned 64 bit integer \
or a string in hexadecimal format encoding such an integer",
)
}
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
Ok(Self::from(u64::from_le_bytes(bytes)))
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value))
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
match Murmur64::try_from(value) {
Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(format!(
"failed to convert '{value}' to Murmur64: {err}"
))),
}
}
}
impl<'de> Deserialize<'de> for Murmur64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(Self(0))
}
}
impl Serialize for Murmur64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{self:016X}"))
}
}
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub struct Murmur32(u32);
impl Murmur32 {
pub fn hash<B>(s: B) -> Self
where
B: AsRef<[u8]>,
{
hash32(s.as_ref(), SEED).into()
}
}
impl From<u32> for Murmur32 {
fn from(value: u32) -> Self {
Self(value)
}
}
impl From<Murmur32> for u32 {
fn from(value: Murmur32) -> Self {
value.0
}
}
impl TryFrom<&str> for Murmur32 {
type Error = Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
u32::from_str_radix(value, 16)
.map(Self)
.wrap_err_with(|| format!("Failed to convert value to Murmur32: {value}"))
}
}
impl fmt::UpperHex for Murmur32 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl fmt::Display for Murmur32 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl Serialize for Murmur32 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{self:08X}"))
}
}
impl<'de> Visitor<'de> for Murmur32 {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str(
"an usigned 32 bit integer \
or a string in hexadecimal format encoding such an integer",
)
}
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
self.visit_u32(u64::from_le_bytes(bytes) as u32)
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_u32(value as u32)
}
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value))
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
match Murmur32::try_from(value) {
Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(format!(
"failed to convert '{value}' to Murmur32: {err}"
))),
}
}
}
impl<'de> Deserialize<'de> for Murmur32 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(Self(0))
}
}
// This type encodes the fact that when reading in a bundle, we don't always have a dictionary
// entry for every hash in there. So we do want to have the real string available when needed,
// but at the same time retain the original hash information for when we don't.
// This is especially important when wanting to write back the read bundle, as the hashes need to
// stay the same.
// The previous system of always turning hashes into strings worked well for the purpose of
// displaying hashes, but would have made it very hard to turn a stringyfied hash back into
// an actual hash.
#[derive(Clone, Debug, Eq)]
pub enum IdString64 {
Hash(Murmur64),
String(String),
}
impl IdString64 {
pub fn to_murmur64(&self) -> Murmur64 {
match self {
Self::Hash(hash) => *hash,
Self::String(s) => Murmur64::hash(s.as_bytes()),
}
}
pub fn display(&self) -> IdString64Display {
let s = match self {
IdString64::Hash(hash) => hash.to_string(),
IdString64::String(s) => s.clone(),
};
IdString64Display(s)
}
pub fn is_string(&self) -> bool {
match self {
IdString64::Hash(_) => false,
IdString64::String(_) => true,
}
}
pub fn is_hash(&self) -> bool {
match self {
IdString64::Hash(_) => true,
IdString64::String(_) => false,
}
}
}
impl<S: Into<String>> From<S> for IdString64 {
fn from(value: S) -> Self {
Self::String(value.into())
}
}
impl From<Murmur64> for IdString64 {
fn from(value: Murmur64) -> Self {
Self::Hash(value)
}
}
impl From<IdString64> for Murmur64 {
fn from(value: IdString64) -> Self {
value.to_murmur64()
}
}
impl PartialEq for IdString64 {
fn eq(&self, other: &Self) -> bool {
self.to_murmur64() == other.to_murmur64()
}
}
impl std::hash::Hash for IdString64 {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
state.write_u64(self.to_murmur64().into());
}
}
impl serde::Serialize for IdString64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_u64(self.to_murmur64().into())
}
}
struct IdString64Visitor;
impl<'de> serde::de::Visitor<'de> for IdString64Visitor {
type Value = IdString64;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an u64 or a string")
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::Hash(value.into()))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::String(v.to_string()))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::String(v))
}
}
impl<'de> serde::Deserialize<'de> for IdString64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_u64(IdString64Visitor)
}
}
pub struct IdString64Display(String);
impl std::fmt::Display for IdString64Display {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::fmt::UpperHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::UpperHex::fmt(&self.to_murmur64(), f)
}
}
impl std::fmt::LowerHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::LowerHex::fmt(&self.to_murmur64(), f)
}
}
pub use types::*;

365
lib/sdk/src/murmur/types.rs Normal file
View file

@ -0,0 +1,365 @@
use self::util::{parse_hex32, parse_hex64};
use super::*;
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub struct Murmur64(u64);
impl Murmur64 {
pub fn hash<B>(s: B) -> Self
where
B: AsRef<[u8]>,
{
hash(s.as_ref(), SEED as u64).into()
}
}
impl From<u64> for Murmur64 {
fn from(value: u64) -> Self {
Self(value)
}
}
impl From<Murmur64> for u64 {
fn from(value: Murmur64) -> Self {
value.0
}
}
impl TryFrom<&str> for Murmur64 {
type Error = Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
parse_hex64(value)
.map(Self)
.wrap_err_with(|| format!("Failed to convert value to Murmur64: {value}"))
}
}
impl fmt::UpperHex for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl fmt::LowerHex for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.0, f)
}
}
impl fmt::Display for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl<'de> Visitor<'de> for Murmur64 {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str(
"an usigned 64 bit integer \
or a string in hexadecimal format encoding such an integer",
)
}
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
Ok(Self::from(u64::from_le_bytes(bytes)))
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value))
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
match Murmur64::try_from(value) {
Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(format!(
"failed to convert '{value}' to Murmur64: {err}"
))),
}
}
}
impl<'de> Deserialize<'de> for Murmur64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(Self(0))
}
}
impl Serialize for Murmur64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{self:016X}"))
}
}
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub struct Murmur32(u32);
impl Murmur32 {
pub fn hash<B>(s: B) -> Self
where
B: AsRef<[u8]>,
{
hash32(s.as_ref(), SEED).into()
}
}
impl From<u32> for Murmur32 {
fn from(value: u32) -> Self {
Self(value)
}
}
impl From<Murmur32> for u32 {
fn from(value: Murmur32) -> Self {
value.0
}
}
impl TryFrom<&str> for Murmur32 {
type Error = Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
parse_hex32(value)
.map(Self)
.wrap_err_with(|| format!("Failed to convert value to Murmur32: {value}"))
}
}
impl fmt::UpperHex for Murmur32 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl fmt::Display for Murmur32 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl Serialize for Murmur32 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{self:08X}"))
}
}
impl<'de> Visitor<'de> for Murmur32 {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str(
"an usigned 32 bit integer \
or a string in hexadecimal format encoding such an integer",
)
}
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
self.visit_u32(u64::from_le_bytes(bytes) as u32)
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_u32(value as u32)
}
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value))
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
match Murmur32::try_from(value) {
Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(format!(
"failed to convert '{value}' to Murmur32: {err}"
))),
}
}
}
impl<'de> Deserialize<'de> for Murmur32 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(Self(0))
}
}
// This type encodes the fact that when reading in a bundle, we don't always have a dictionary
// entry for every hash in there. So we do want to have the real string available when needed,
// but at the same time retain the original hash information for when we don't.
// This is especially important when wanting to write back the read bundle, as the hashes need to
// stay the same.
// The previous system of always turning hashes into strings worked well for the purpose of
// displaying hashes, but would have made it very hard to turn a stringyfied hash back into
// an actual hash.
#[derive(Clone, Debug, Eq)]
pub enum IdString64 {
Hash(Murmur64),
String(String),
}
impl IdString64 {
pub fn to_murmur64(&self) -> Murmur64 {
match self {
Self::Hash(hash) => *hash,
Self::String(s) => Murmur64::hash(s.as_bytes()),
}
}
pub fn display(&self) -> IdString64Display {
let s = match self {
IdString64::Hash(hash) => hash.to_string(),
IdString64::String(s) => s.clone(),
};
IdString64Display(s)
}
pub fn is_string(&self) -> bool {
match self {
IdString64::Hash(_) => false,
IdString64::String(_) => true,
}
}
pub fn is_hash(&self) -> bool {
match self {
IdString64::Hash(_) => true,
IdString64::String(_) => false,
}
}
}
impl<S: Into<String>> From<S> for IdString64 {
fn from(value: S) -> Self {
Self::String(value.into())
}
}
impl From<Murmur64> for IdString64 {
fn from(value: Murmur64) -> Self {
Self::Hash(value)
}
}
impl From<IdString64> for Murmur64 {
fn from(value: IdString64) -> Self {
value.to_murmur64()
}
}
impl PartialEq for IdString64 {
fn eq(&self, other: &Self) -> bool {
self.to_murmur64() == other.to_murmur64()
}
}
impl std::hash::Hash for IdString64 {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
state.write_u64(self.to_murmur64().into());
}
}
impl serde::Serialize for IdString64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_u64(self.to_murmur64().into())
}
}
struct IdString64Visitor;
impl<'de> serde::de::Visitor<'de> for IdString64Visitor {
type Value = IdString64;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an u64 or a string")
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::Hash(value.into()))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::String(v.to_string()))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::String(v))
}
}
impl<'de> serde::Deserialize<'de> for IdString64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_u64(IdString64Visitor)
}
}
pub struct IdString64Display(String);
impl std::fmt::Display for IdString64Display {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::fmt::UpperHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::UpperHex::fmt(&self.to_murmur64(), f)
}
}
impl std::fmt::LowerHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::LowerHex::fmt(&self.to_murmur64(), f)
}
}

132
lib/sdk/src/murmur/util.rs Normal file
View file

@ -0,0 +1,132 @@
use color_eyre::eyre::bail;
use color_eyre::Result;
// Generates tables similar to these:
// https://github.com/zbjornson/fast-hex/blob/a3487bca95127634a61bfeae8f8bfc8f0e5baa3f/src/hex.cc#L20-L89
// `upper` determines upper vs. lower bits (first character is `upper`).
const fn generate_byte_map(upper: bool) -> [u8; 256] {
let mut out = [0u8; 256];
let factor = if upper { 16 } else { 1 };
let mut i = 0;
while i < 256 {
match i {
0x30..=0x39 => out[i] = factor * (i as u8 - 0x30),
0x41..=0x46 => out[i] = factor * (9 + i as u8 - 0x40),
0x61..=0x66 => out[i] = factor * (9 + i as u8 - 0x60),
_ => out[i] = u8::MAX,
}
i += 1;
}
out
}
const BYTE_MAP_UPPER: [u8; 256] = generate_byte_map(true);
const BYTE_MAP_LOWER: [u8; 256] = generate_byte_map(false);
macro_rules! make_parse_hex {
($name:ident, $ty:ty, $len:expr) => {
#[inline]
pub fn $name(s: impl AsRef<str>) -> Result<$ty> {
// For the string to be valid hex characters, it needs to be ASCII.
// So we can simply treat it as a byte stream.
let s = s.as_ref().as_bytes();
if s.len() != $len {
bail!(
"String length doesn't match. Expected {}, got {}",
$len,
s.len()
);
}
let n = $len / 2;
let mut out: $ty = 0;
let mut i = 0;
while i < n {
let j = i * 2;
let c1 = BYTE_MAP_UPPER[s[j] as usize];
if c1 == u8::MAX {
bail!("Invalid character '{:?}' ({})", char::from(c1), c1);
}
let c2 = BYTE_MAP_LOWER[s[j + 1] as usize];
if c2 == u8::MAX {
bail!("Invalid character '{:?}' ({})", char::from(c2), c2);
}
out |= ((c1 + c2) as $ty) << (n - i - 1) * 8;
i += 1;
}
Ok(out)
}
};
}
make_parse_hex!(parse_hex64, u64, 16);
make_parse_hex!(parse_hex32, u32, 8);
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parse_32() {
let hash = "A14E8DFA";
assert_eq!(parse_hex32(hash).unwrap(), 0xA14E8DFA);
}
#[test]
fn parse_64() {
let hash = "A14E8DFA2CD117E2";
assert_eq!(parse_hex64(hash).unwrap(), 0xA14E8DFA2CD117E2);
}
#[test]
fn std_from_radix_32() {
let hash = "A14E8DFA";
assert_eq!(u32::from_str_radix(hash, 16).unwrap(), 0xA14E8DFA);
}
#[test]
fn std_from_radix_64() {
let hash = "A14E8DFA2CD117E2";
assert_eq!(u64::from_str_radix(hash, 16).unwrap(), 0xA14E8DFA2CD117E2);
}
}
#[cfg(test)]
mod bench {
use super::{parse_hex32, parse_hex64};
extern crate test;
const HASH32: &str = "A14E8DFA";
const HASH64: &str = "A14E8DFA2CD117E2";
#[bench]
fn custom_32(b: &mut test::Bencher) {
b.iter(|| test::black_box(parse_hex32(test::black_box(HASH32))))
}
#[bench]
fn std_32(b: &mut test::Bencher) {
b.iter(|| test::black_box(u32::from_str_radix(test::black_box(HASH32), 16)))
}
#[bench]
fn custom_64(b: &mut test::Bencher) {
b.iter(|| test::black_box(parse_hex64(test::black_box(HASH64))))
}
#[bench]
fn std_64(b: &mut test::Bencher) {
b.iter(|| test::black_box(u64::from_str_radix(test::black_box(HASH64), 16)))
}
}