diff --git a/Cargo.toml b/Cargo.toml index 39d8f38..8cbe52c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,3 +31,9 @@ strip = "debuginfo" [profile.release-lto] inherits = "release" lto = true + +[profile.perf] +inherits = "release" +strip = false +lto = true +debug = "line-tables-only" diff --git a/Justfile b/Justfile index f9b37bc..dbecc22 100644 --- a/Justfile +++ b/Justfile @@ -1,5 +1,13 @@ +set positional-arguments + fly_target := "main" +build-perf-dtmt: + cargo build --profile perf --bin dtmt + +perf-dtmt *args='': build-perf-dtmt + perf record --call-graph dwarf ./target/perf/dtmt "$@" + ci-build: ci-build-msvc ci-build-linux ci-build-msvc: diff --git a/lib/sdk/src/lib.rs b/lib/sdk/src/lib.rs index 37a4d67..a24b3bd 100644 --- a/lib/sdk/src/lib.rs +++ b/lib/sdk/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(test)] + mod binary; mod bundle; mod context; diff --git a/lib/sdk/src/murmur/mod.rs b/lib/sdk/src/murmur/mod.rs index a2a9ef3..87a8473 100644 --- a/lib/sdk/src/murmur/mod.rs +++ b/lib/sdk/src/murmur/mod.rs @@ -1,15 +1,16 @@ use std::fmt; use color_eyre::eyre::Context; -use color_eyre::Report; +use color_eyre::{Report, Result}; use serde::de::Visitor; -use serde::{Deserialize, Serialize}; -use serde::{Deserializer, Serializer}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; mod dictionary; // Currently unused // mod murmurhash32; mod murmurhash64; +mod types; +mod util; pub const SEED: u32 = 0; @@ -18,372 +19,4 @@ pub use murmurhash64::hash; pub use murmurhash64::hash32; pub use murmurhash64::hash_inverse as inverse; -fn _swap_bytes_u32(value: u32) -> u32 { - u32::from_le_bytes(value.to_be_bytes()) -} - -fn _swap_bytes_u64(value: u64) -> u64 { - u64::from_le_bytes(value.to_be_bytes()) -} - -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub struct Murmur64(u64); - -impl Murmur64 { - pub fn hash(s: B) -> Self - where - B: AsRef<[u8]>, - { - hash(s.as_ref(), SEED as u64).into() - } -} - -impl From for Murmur64 { - fn from(value: u64) -> Self { - Self(value) - } -} - -impl From for u64 { - fn from(value: Murmur64) -> Self { - value.0 - } -} - -impl TryFrom<&str> for Murmur64 { - type Error = Report; - - fn try_from(value: &str) -> Result { - u64::from_str_radix(value, 16) - .map(Self) - .wrap_err_with(|| format!("Failed to convert value to Murmur64: {value}")) - } -} - -impl fmt::UpperHex for Murmur64 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::UpperHex::fmt(&self.0, f) - } -} - -impl fmt::LowerHex for Murmur64 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::LowerHex::fmt(&self.0, f) - } -} - -impl fmt::Display for Murmur64 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::UpperHex::fmt(&self.0, f) - } -} - -impl<'de> Visitor<'de> for Murmur64 { - type Value = Self; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str( - "an usigned 64 bit integer \ - or a string in hexadecimal format encoding such an integer", - ) - } - - fn visit_f64(self, value: f64) -> Result - where - E: serde::de::Error, - { - let bytes = value.to_le_bytes(); - Ok(Self::from(u64::from_le_bytes(bytes))) - } - - fn visit_u64(self, value: u64) -> Result - where - E: serde::de::Error, - { - Ok(Self::from(value)) - } - - fn visit_str(self, value: &str) -> Result - where - E: serde::de::Error, - { - match Murmur64::try_from(value) { - Ok(hash) => Ok(hash), - Err(err) => Err(E::custom(format!( - "failed to convert '{value}' to Murmur64: {err}" - ))), - } - } -} - -impl<'de> Deserialize<'de> for Murmur64 { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - deserializer.deserialize_any(Self(0)) - } -} - -impl Serialize for Murmur64 { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(&format!("{self:016X}")) - } -} - -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub struct Murmur32(u32); - -impl Murmur32 { - pub fn hash(s: B) -> Self - where - B: AsRef<[u8]>, - { - hash32(s.as_ref(), SEED).into() - } -} - -impl From for Murmur32 { - fn from(value: u32) -> Self { - Self(value) - } -} - -impl From for u32 { - fn from(value: Murmur32) -> Self { - value.0 - } -} - -impl TryFrom<&str> for Murmur32 { - type Error = Report; - - fn try_from(value: &str) -> Result { - u32::from_str_radix(value, 16) - .map(Self) - .wrap_err_with(|| format!("Failed to convert value to Murmur32: {value}")) - } -} - -impl fmt::UpperHex for Murmur32 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::UpperHex::fmt(&self.0, f) - } -} - -impl fmt::Display for Murmur32 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::UpperHex::fmt(&self.0, f) - } -} - -impl Serialize for Murmur32 { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(&format!("{self:08X}")) - } -} - -impl<'de> Visitor<'de> for Murmur32 { - type Value = Self; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str( - "an usigned 32 bit integer \ - or a string in hexadecimal format encoding such an integer", - ) - } - - fn visit_f64(self, value: f64) -> Result - where - E: serde::de::Error, - { - let bytes = value.to_le_bytes(); - self.visit_u32(u64::from_le_bytes(bytes) as u32) - } - - fn visit_u64(self, value: u64) -> Result - where - E: serde::de::Error, - { - self.visit_u32(value as u32) - } - - fn visit_u32(self, value: u32) -> Result - where - E: serde::de::Error, - { - Ok(Self::from(value)) - } - - fn visit_str(self, value: &str) -> Result - where - E: serde::de::Error, - { - match Murmur32::try_from(value) { - Ok(hash) => Ok(hash), - Err(err) => Err(E::custom(format!( - "failed to convert '{value}' to Murmur32: {err}" - ))), - } - } -} - -impl<'de> Deserialize<'de> for Murmur32 { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - deserializer.deserialize_any(Self(0)) - } -} - -// This type encodes the fact that when reading in a bundle, we don't always have a dictionary -// entry for every hash in there. So we do want to have the real string available when needed, -// but at the same time retain the original hash information for when we don't. -// This is especially important when wanting to write back the read bundle, as the hashes need to -// stay the same. -// The previous system of always turning hashes into strings worked well for the purpose of -// displaying hashes, but would have made it very hard to turn a stringyfied hash back into -// an actual hash. -#[derive(Clone, Debug, Eq)] -pub enum IdString64 { - Hash(Murmur64), - String(String), -} - -impl IdString64 { - pub fn to_murmur64(&self) -> Murmur64 { - match self { - Self::Hash(hash) => *hash, - Self::String(s) => Murmur64::hash(s.as_bytes()), - } - } - - pub fn display(&self) -> IdString64Display { - let s = match self { - IdString64::Hash(hash) => hash.to_string(), - IdString64::String(s) => s.clone(), - }; - - IdString64Display(s) - } - - pub fn is_string(&self) -> bool { - match self { - IdString64::Hash(_) => false, - IdString64::String(_) => true, - } - } - - pub fn is_hash(&self) -> bool { - match self { - IdString64::Hash(_) => true, - IdString64::String(_) => false, - } - } -} - -impl> From for IdString64 { - fn from(value: S) -> Self { - Self::String(value.into()) - } -} - -impl From for IdString64 { - fn from(value: Murmur64) -> Self { - Self::Hash(value) - } -} - -impl From for Murmur64 { - fn from(value: IdString64) -> Self { - value.to_murmur64() - } -} - -impl PartialEq for IdString64 { - fn eq(&self, other: &Self) -> bool { - self.to_murmur64() == other.to_murmur64() - } -} - -impl std::hash::Hash for IdString64 { - fn hash(&self, state: &mut H) { - state.write_u64(self.to_murmur64().into()); - } -} - -impl serde::Serialize for IdString64 { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_u64(self.to_murmur64().into()) - } -} - -struct IdString64Visitor; - -impl<'de> serde::de::Visitor<'de> for IdString64Visitor { - type Value = IdString64; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("an u64 or a string") - } - - fn visit_u64(self, value: u64) -> Result - where - E: serde::de::Error, - { - Ok(IdString64::Hash(value.into())) - } - - fn visit_str(self, v: &str) -> Result - where - E: serde::de::Error, - { - Ok(IdString64::String(v.to_string())) - } - - fn visit_string(self, v: String) -> Result - where - E: serde::de::Error, - { - Ok(IdString64::String(v)) - } -} - -impl<'de> serde::Deserialize<'de> for IdString64 { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - deserializer.deserialize_u64(IdString64Visitor) - } -} - -pub struct IdString64Display(String); - -impl std::fmt::Display for IdString64Display { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl std::fmt::UpperHex for IdString64 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - std::fmt::UpperHex::fmt(&self.to_murmur64(), f) - } -} - -impl std::fmt::LowerHex for IdString64 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - std::fmt::LowerHex::fmt(&self.to_murmur64(), f) - } -} +pub use types::*; diff --git a/lib/sdk/src/murmur/types.rs b/lib/sdk/src/murmur/types.rs new file mode 100644 index 0000000..1146494 --- /dev/null +++ b/lib/sdk/src/murmur/types.rs @@ -0,0 +1,365 @@ +use self::util::{parse_hex32, parse_hex64}; + +use super::*; + +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub struct Murmur64(u64); + +impl Murmur64 { + pub fn hash(s: B) -> Self + where + B: AsRef<[u8]>, + { + hash(s.as_ref(), SEED as u64).into() + } +} + +impl From for Murmur64 { + fn from(value: u64) -> Self { + Self(value) + } +} + +impl From for u64 { + fn from(value: Murmur64) -> Self { + value.0 + } +} + +impl TryFrom<&str> for Murmur64 { + type Error = Report; + + fn try_from(value: &str) -> Result { + parse_hex64(value) + .map(Self) + .wrap_err_with(|| format!("Failed to convert value to Murmur64: {value}")) + } +} + +impl fmt::UpperHex for Murmur64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::UpperHex::fmt(&self.0, f) + } +} + +impl fmt::LowerHex for Murmur64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::LowerHex::fmt(&self.0, f) + } +} + +impl fmt::Display for Murmur64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::UpperHex::fmt(&self.0, f) + } +} + +impl<'de> Visitor<'de> for Murmur64 { + type Value = Self; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str( + "an usigned 64 bit integer \ + or a string in hexadecimal format encoding such an integer", + ) + } + + fn visit_f64(self, value: f64) -> Result + where + E: serde::de::Error, + { + let bytes = value.to_le_bytes(); + Ok(Self::from(u64::from_le_bytes(bytes))) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + Ok(Self::from(value)) + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + match Murmur64::try_from(value) { + Ok(hash) => Ok(hash), + Err(err) => Err(E::custom(format!( + "failed to convert '{value}' to Murmur64: {err}" + ))), + } + } +} + +impl<'de> Deserialize<'de> for Murmur64 { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(Self(0)) + } +} + +impl Serialize for Murmur64 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&format!("{self:016X}")) + } +} + +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub struct Murmur32(u32); + +impl Murmur32 { + pub fn hash(s: B) -> Self + where + B: AsRef<[u8]>, + { + hash32(s.as_ref(), SEED).into() + } +} + +impl From for Murmur32 { + fn from(value: u32) -> Self { + Self(value) + } +} + +impl From for u32 { + fn from(value: Murmur32) -> Self { + value.0 + } +} + +impl TryFrom<&str> for Murmur32 { + type Error = Report; + + fn try_from(value: &str) -> Result { + parse_hex32(value) + .map(Self) + .wrap_err_with(|| format!("Failed to convert value to Murmur32: {value}")) + } +} + +impl fmt::UpperHex for Murmur32 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::UpperHex::fmt(&self.0, f) + } +} + +impl fmt::Display for Murmur32 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::UpperHex::fmt(&self.0, f) + } +} + +impl Serialize for Murmur32 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&format!("{self:08X}")) + } +} + +impl<'de> Visitor<'de> for Murmur32 { + type Value = Self; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str( + "an usigned 32 bit integer \ + or a string in hexadecimal format encoding such an integer", + ) + } + + fn visit_f64(self, value: f64) -> Result + where + E: serde::de::Error, + { + let bytes = value.to_le_bytes(); + self.visit_u32(u64::from_le_bytes(bytes) as u32) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + self.visit_u32(value as u32) + } + + fn visit_u32(self, value: u32) -> Result + where + E: serde::de::Error, + { + Ok(Self::from(value)) + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + match Murmur32::try_from(value) { + Ok(hash) => Ok(hash), + Err(err) => Err(E::custom(format!( + "failed to convert '{value}' to Murmur32: {err}" + ))), + } + } +} + +impl<'de> Deserialize<'de> for Murmur32 { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(Self(0)) + } +} + +// This type encodes the fact that when reading in a bundle, we don't always have a dictionary +// entry for every hash in there. So we do want to have the real string available when needed, +// but at the same time retain the original hash information for when we don't. +// This is especially important when wanting to write back the read bundle, as the hashes need to +// stay the same. +// The previous system of always turning hashes into strings worked well for the purpose of +// displaying hashes, but would have made it very hard to turn a stringyfied hash back into +// an actual hash. +#[derive(Clone, Debug, Eq)] +pub enum IdString64 { + Hash(Murmur64), + String(String), +} + +impl IdString64 { + pub fn to_murmur64(&self) -> Murmur64 { + match self { + Self::Hash(hash) => *hash, + Self::String(s) => Murmur64::hash(s.as_bytes()), + } + } + + pub fn display(&self) -> IdString64Display { + let s = match self { + IdString64::Hash(hash) => hash.to_string(), + IdString64::String(s) => s.clone(), + }; + + IdString64Display(s) + } + + pub fn is_string(&self) -> bool { + match self { + IdString64::Hash(_) => false, + IdString64::String(_) => true, + } + } + + pub fn is_hash(&self) -> bool { + match self { + IdString64::Hash(_) => true, + IdString64::String(_) => false, + } + } +} + +impl> From for IdString64 { + fn from(value: S) -> Self { + Self::String(value.into()) + } +} + +impl From for IdString64 { + fn from(value: Murmur64) -> Self { + Self::Hash(value) + } +} + +impl From for Murmur64 { + fn from(value: IdString64) -> Self { + value.to_murmur64() + } +} + +impl PartialEq for IdString64 { + fn eq(&self, other: &Self) -> bool { + self.to_murmur64() == other.to_murmur64() + } +} + +impl std::hash::Hash for IdString64 { + fn hash(&self, state: &mut H) { + state.write_u64(self.to_murmur64().into()); + } +} + +impl serde::Serialize for IdString64 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_u64(self.to_murmur64().into()) + } +} + +struct IdString64Visitor; + +impl<'de> serde::de::Visitor<'de> for IdString64Visitor { + type Value = IdString64; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("an u64 or a string") + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + Ok(IdString64::Hash(value.into())) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + Ok(IdString64::String(v.to_string())) + } + + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + Ok(IdString64::String(v)) + } +} + +impl<'de> serde::Deserialize<'de> for IdString64 { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_u64(IdString64Visitor) + } +} + +pub struct IdString64Display(String); + +impl std::fmt::Display for IdString64Display { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl std::fmt::UpperHex for IdString64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + std::fmt::UpperHex::fmt(&self.to_murmur64(), f) + } +} + +impl std::fmt::LowerHex for IdString64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + std::fmt::LowerHex::fmt(&self.to_murmur64(), f) + } +} diff --git a/lib/sdk/src/murmur/util.rs b/lib/sdk/src/murmur/util.rs new file mode 100644 index 0000000..134c4e7 --- /dev/null +++ b/lib/sdk/src/murmur/util.rs @@ -0,0 +1,132 @@ +use color_eyre::eyre::bail; +use color_eyre::Result; + +// Generates tables similar to these: +// https://github.com/zbjornson/fast-hex/blob/a3487bca95127634a61bfeae8f8bfc8f0e5baa3f/src/hex.cc#L20-L89 +// `upper` determines upper vs. lower bits (first character is `upper`). +const fn generate_byte_map(upper: bool) -> [u8; 256] { + let mut out = [0u8; 256]; + let factor = if upper { 16 } else { 1 }; + + let mut i = 0; + + while i < 256 { + match i { + 0x30..=0x39 => out[i] = factor * (i as u8 - 0x30), + 0x41..=0x46 => out[i] = factor * (9 + i as u8 - 0x40), + 0x61..=0x66 => out[i] = factor * (9 + i as u8 - 0x60), + _ => out[i] = u8::MAX, + } + i += 1; + } + + out +} + +const BYTE_MAP_UPPER: [u8; 256] = generate_byte_map(true); +const BYTE_MAP_LOWER: [u8; 256] = generate_byte_map(false); + +macro_rules! make_parse_hex { + ($name:ident, $ty:ty, $len:expr) => { + #[inline] + pub fn $name(s: impl AsRef) -> Result<$ty> { + // For the string to be valid hex characters, it needs to be ASCII. + // So we can simply treat it as a byte stream. + let s = s.as_ref().as_bytes(); + + if s.len() != $len { + bail!( + "String length doesn't match. Expected {}, got {}", + $len, + s.len() + ); + } + + let n = $len / 2; + let mut out: $ty = 0; + let mut i = 0; + + while i < n { + let j = i * 2; + + let c1 = BYTE_MAP_UPPER[s[j] as usize]; + if c1 == u8::MAX { + bail!("Invalid character '{:?}' ({})", char::from(c1), c1); + } + + let c2 = BYTE_MAP_LOWER[s[j + 1] as usize]; + if c2 == u8::MAX { + bail!("Invalid character '{:?}' ({})", char::from(c2), c2); + } + + out |= ((c1 + c2) as $ty) << (n - i - 1) * 8; + + i += 1; + } + + Ok(out) + } + }; +} + +make_parse_hex!(parse_hex64, u64, 16); +make_parse_hex!(parse_hex32, u32, 8); + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn parse_32() { + let hash = "A14E8DFA"; + assert_eq!(parse_hex32(hash).unwrap(), 0xA14E8DFA); + } + + #[test] + fn parse_64() { + let hash = "A14E8DFA2CD117E2"; + assert_eq!(parse_hex64(hash).unwrap(), 0xA14E8DFA2CD117E2); + } + + #[test] + fn std_from_radix_32() { + let hash = "A14E8DFA"; + assert_eq!(u32::from_str_radix(hash, 16).unwrap(), 0xA14E8DFA); + } + + #[test] + fn std_from_radix_64() { + let hash = "A14E8DFA2CD117E2"; + assert_eq!(u64::from_str_radix(hash, 16).unwrap(), 0xA14E8DFA2CD117E2); + } +} + +#[cfg(test)] +mod bench { + use super::{parse_hex32, parse_hex64}; + + extern crate test; + + const HASH32: &str = "A14E8DFA"; + const HASH64: &str = "A14E8DFA2CD117E2"; + + #[bench] + fn custom_32(b: &mut test::Bencher) { + b.iter(|| test::black_box(parse_hex32(test::black_box(HASH32)))) + } + + #[bench] + fn std_32(b: &mut test::Bencher) { + b.iter(|| test::black_box(u32::from_str_radix(test::black_box(HASH32), 16))) + } + + #[bench] + fn custom_64(b: &mut test::Bencher) { + b.iter(|| test::black_box(parse_hex64(test::black_box(HASH64)))) + } + + #[bench] + fn std_64(b: &mut test::Bencher) { + b.iter(|| test::black_box(u64::from_str_radix(test::black_box(HASH64), 16))) + } +}