Perform various optimizations #173

Merged
lucas merged 19 commits from feat/optimization into master 2024-07-10 19:45:52 +02:00
6 changed files with 518 additions and 372 deletions
Showing only changes of commit 96a7eeb1e0 - Show all commits

View file

@ -31,3 +31,9 @@ strip = "debuginfo"
[profile.release-lto] [profile.release-lto]
inherits = "release" inherits = "release"
lto = true lto = true
[profile.perf]
inherits = "release"
strip = false
lto = true
debug = "line-tables-only"

View file

@ -1,5 +1,13 @@
set positional-arguments
fly_target := "main" fly_target := "main"
build-perf-dtmt:
cargo build --profile perf --bin dtmt
perf-dtmt *args='': build-perf-dtmt
perf record --call-graph dwarf ./target/perf/dtmt "$@"
ci-build: ci-build-msvc ci-build-linux ci-build: ci-build-msvc ci-build-linux
ci-build-msvc: ci-build-msvc:

View file

@ -1,3 +1,5 @@
#![feature(test)]
mod binary; mod binary;
mod bundle; mod bundle;
mod context; mod context;

View file

@ -1,15 +1,16 @@
use std::fmt; use std::fmt;
use color_eyre::eyre::Context; use color_eyre::eyre::Context;
use color_eyre::Report; use color_eyre::{Report, Result};
use serde::de::Visitor; use serde::de::Visitor;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde::{Deserializer, Serializer};
mod dictionary; mod dictionary;
// Currently unused // Currently unused
// mod murmurhash32; // mod murmurhash32;
mod murmurhash64; mod murmurhash64;
mod types;
mod util;
pub const SEED: u32 = 0; pub const SEED: u32 = 0;
@ -18,372 +19,4 @@ pub use murmurhash64::hash;
pub use murmurhash64::hash32; pub use murmurhash64::hash32;
pub use murmurhash64::hash_inverse as inverse; pub use murmurhash64::hash_inverse as inverse;
fn _swap_bytes_u32(value: u32) -> u32 { pub use types::*;
u32::from_le_bytes(value.to_be_bytes())
}
fn _swap_bytes_u64(value: u64) -> u64 {
u64::from_le_bytes(value.to_be_bytes())
}
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub struct Murmur64(u64);
impl Murmur64 {
pub fn hash<B>(s: B) -> Self
where
B: AsRef<[u8]>,
{
hash(s.as_ref(), SEED as u64).into()
}
}
impl From<u64> for Murmur64 {
fn from(value: u64) -> Self {
Self(value)
}
}
impl From<Murmur64> for u64 {
fn from(value: Murmur64) -> Self {
value.0
}
}
impl TryFrom<&str> for Murmur64 {
type Error = Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
u64::from_str_radix(value, 16)
.map(Self)
.wrap_err_with(|| format!("Failed to convert value to Murmur64: {value}"))
}
}
impl fmt::UpperHex for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl fmt::LowerHex for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.0, f)
}
}
impl fmt::Display for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl<'de> Visitor<'de> for Murmur64 {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str(
"an usigned 64 bit integer \
or a string in hexadecimal format encoding such an integer",
)
}
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
Ok(Self::from(u64::from_le_bytes(bytes)))
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value))
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
match Murmur64::try_from(value) {
Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(format!(
"failed to convert '{value}' to Murmur64: {err}"
))),
}
}
}
impl<'de> Deserialize<'de> for Murmur64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(Self(0))
}
}
impl Serialize for Murmur64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{self:016X}"))
}
}
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub struct Murmur32(u32);
impl Murmur32 {
pub fn hash<B>(s: B) -> Self
where
B: AsRef<[u8]>,
{
hash32(s.as_ref(), SEED).into()
}
}
impl From<u32> for Murmur32 {
fn from(value: u32) -> Self {
Self(value)
}
}
impl From<Murmur32> for u32 {
fn from(value: Murmur32) -> Self {
value.0
}
}
impl TryFrom<&str> for Murmur32 {
type Error = Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
u32::from_str_radix(value, 16)
.map(Self)
.wrap_err_with(|| format!("Failed to convert value to Murmur32: {value}"))
}
}
impl fmt::UpperHex for Murmur32 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl fmt::Display for Murmur32 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl Serialize for Murmur32 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{self:08X}"))
}
}
impl<'de> Visitor<'de> for Murmur32 {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str(
"an usigned 32 bit integer \
or a string in hexadecimal format encoding such an integer",
)
}
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
self.visit_u32(u64::from_le_bytes(bytes) as u32)
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_u32(value as u32)
}
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value))
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
match Murmur32::try_from(value) {
Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(format!(
"failed to convert '{value}' to Murmur32: {err}"
))),
}
}
}
impl<'de> Deserialize<'de> for Murmur32 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(Self(0))
}
}
// This type encodes the fact that when reading in a bundle, we don't always have a dictionary
// entry for every hash in there. So we do want to have the real string available when needed,
// but at the same time retain the original hash information for when we don't.
// This is especially important when wanting to write back the read bundle, as the hashes need to
// stay the same.
// The previous system of always turning hashes into strings worked well for the purpose of
// displaying hashes, but would have made it very hard to turn a stringyfied hash back into
// an actual hash.
#[derive(Clone, Debug, Eq)]
pub enum IdString64 {
Hash(Murmur64),
String(String),
}
impl IdString64 {
pub fn to_murmur64(&self) -> Murmur64 {
match self {
Self::Hash(hash) => *hash,
Self::String(s) => Murmur64::hash(s.as_bytes()),
}
}
pub fn display(&self) -> IdString64Display {
let s = match self {
IdString64::Hash(hash) => hash.to_string(),
IdString64::String(s) => s.clone(),
};
IdString64Display(s)
}
pub fn is_string(&self) -> bool {
match self {
IdString64::Hash(_) => false,
IdString64::String(_) => true,
}
}
pub fn is_hash(&self) -> bool {
match self {
IdString64::Hash(_) => true,
IdString64::String(_) => false,
}
}
}
impl<S: Into<String>> From<S> for IdString64 {
fn from(value: S) -> Self {
Self::String(value.into())
}
}
impl From<Murmur64> for IdString64 {
fn from(value: Murmur64) -> Self {
Self::Hash(value)
}
}
impl From<IdString64> for Murmur64 {
fn from(value: IdString64) -> Self {
value.to_murmur64()
}
}
impl PartialEq for IdString64 {
fn eq(&self, other: &Self) -> bool {
self.to_murmur64() == other.to_murmur64()
}
}
impl std::hash::Hash for IdString64 {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
state.write_u64(self.to_murmur64().into());
}
}
impl serde::Serialize for IdString64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_u64(self.to_murmur64().into())
}
}
struct IdString64Visitor;
impl<'de> serde::de::Visitor<'de> for IdString64Visitor {
type Value = IdString64;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an u64 or a string")
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::Hash(value.into()))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::String(v.to_string()))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::String(v))
}
}
impl<'de> serde::Deserialize<'de> for IdString64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_u64(IdString64Visitor)
}
}
pub struct IdString64Display(String);
impl std::fmt::Display for IdString64Display {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::fmt::UpperHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::UpperHex::fmt(&self.to_murmur64(), f)
}
}
impl std::fmt::LowerHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::LowerHex::fmt(&self.to_murmur64(), f)
}
}

365
lib/sdk/src/murmur/types.rs Normal file
View file

@ -0,0 +1,365 @@
use self::util::{parse_hex32, parse_hex64};
use super::*;
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub struct Murmur64(u64);
impl Murmur64 {
pub fn hash<B>(s: B) -> Self
where
B: AsRef<[u8]>,
{
hash(s.as_ref(), SEED as u64).into()
}
}
impl From<u64> for Murmur64 {
fn from(value: u64) -> Self {
Self(value)
}
}
impl From<Murmur64> for u64 {
fn from(value: Murmur64) -> Self {
value.0
}
}
impl TryFrom<&str> for Murmur64 {
type Error = Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
parse_hex64(value)
.map(Self)
.wrap_err_with(|| format!("Failed to convert value to Murmur64: {value}"))
}
}
impl fmt::UpperHex for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl fmt::LowerHex for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.0, f)
}
}
impl fmt::Display for Murmur64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl<'de> Visitor<'de> for Murmur64 {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str(
"an usigned 64 bit integer \
or a string in hexadecimal format encoding such an integer",
)
}
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
Ok(Self::from(u64::from_le_bytes(bytes)))
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value))
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
match Murmur64::try_from(value) {
Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(format!(
"failed to convert '{value}' to Murmur64: {err}"
))),
}
}
}
impl<'de> Deserialize<'de> for Murmur64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(Self(0))
}
}
impl Serialize for Murmur64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{self:016X}"))
}
}
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub struct Murmur32(u32);
impl Murmur32 {
pub fn hash<B>(s: B) -> Self
where
B: AsRef<[u8]>,
{
hash32(s.as_ref(), SEED).into()
}
}
impl From<u32> for Murmur32 {
fn from(value: u32) -> Self {
Self(value)
}
}
impl From<Murmur32> for u32 {
fn from(value: Murmur32) -> Self {
value.0
}
}
impl TryFrom<&str> for Murmur32 {
type Error = Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
parse_hex32(value)
.map(Self)
.wrap_err_with(|| format!("Failed to convert value to Murmur32: {value}"))
}
}
impl fmt::UpperHex for Murmur32 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl fmt::Display for Murmur32 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::UpperHex::fmt(&self.0, f)
}
}
impl Serialize for Murmur32 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{self:08X}"))
}
}
impl<'de> Visitor<'de> for Murmur32 {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str(
"an usigned 32 bit integer \
or a string in hexadecimal format encoding such an integer",
)
}
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let bytes = value.to_le_bytes();
self.visit_u32(u64::from_le_bytes(bytes) as u32)
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_u32(value as u32)
}
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Self::from(value))
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
match Murmur32::try_from(value) {
Ok(hash) => Ok(hash),
Err(err) => Err(E::custom(format!(
"failed to convert '{value}' to Murmur32: {err}"
))),
}
}
}
impl<'de> Deserialize<'de> for Murmur32 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(Self(0))
}
}
// This type encodes the fact that when reading in a bundle, we don't always have a dictionary
// entry for every hash in there. So we do want to have the real string available when needed,
// but at the same time retain the original hash information for when we don't.
// This is especially important when wanting to write back the read bundle, as the hashes need to
// stay the same.
// The previous system of always turning hashes into strings worked well for the purpose of
// displaying hashes, but would have made it very hard to turn a stringyfied hash back into
// an actual hash.
#[derive(Clone, Debug, Eq)]
pub enum IdString64 {
Hash(Murmur64),
String(String),
}
impl IdString64 {
pub fn to_murmur64(&self) -> Murmur64 {
match self {
Self::Hash(hash) => *hash,
Self::String(s) => Murmur64::hash(s.as_bytes()),
}
}
pub fn display(&self) -> IdString64Display {
let s = match self {
IdString64::Hash(hash) => hash.to_string(),
IdString64::String(s) => s.clone(),
};
IdString64Display(s)
}
pub fn is_string(&self) -> bool {
match self {
IdString64::Hash(_) => false,
IdString64::String(_) => true,
}
}
pub fn is_hash(&self) -> bool {
match self {
IdString64::Hash(_) => true,
IdString64::String(_) => false,
}
}
}
impl<S: Into<String>> From<S> for IdString64 {
fn from(value: S) -> Self {
Self::String(value.into())
}
}
impl From<Murmur64> for IdString64 {
fn from(value: Murmur64) -> Self {
Self::Hash(value)
}
}
impl From<IdString64> for Murmur64 {
fn from(value: IdString64) -> Self {
value.to_murmur64()
}
}
impl PartialEq for IdString64 {
fn eq(&self, other: &Self) -> bool {
self.to_murmur64() == other.to_murmur64()
}
}
impl std::hash::Hash for IdString64 {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
state.write_u64(self.to_murmur64().into());
}
}
impl serde::Serialize for IdString64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_u64(self.to_murmur64().into())
}
}
struct IdString64Visitor;
impl<'de> serde::de::Visitor<'de> for IdString64Visitor {
type Value = IdString64;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an u64 or a string")
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::Hash(value.into()))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::String(v.to_string()))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(IdString64::String(v))
}
}
impl<'de> serde::Deserialize<'de> for IdString64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_u64(IdString64Visitor)
}
}
pub struct IdString64Display(String);
impl std::fmt::Display for IdString64Display {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::fmt::UpperHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::UpperHex::fmt(&self.to_murmur64(), f)
}
}
impl std::fmt::LowerHex for IdString64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
std::fmt::LowerHex::fmt(&self.to_murmur64(), f)
}
}

132
lib/sdk/src/murmur/util.rs Normal file
View file

@ -0,0 +1,132 @@
use color_eyre::eyre::bail;
use color_eyre::Result;
// Generates tables similar to these:
// https://github.com/zbjornson/fast-hex/blob/a3487bca95127634a61bfeae8f8bfc8f0e5baa3f/src/hex.cc#L20-L89
// `upper` determines upper vs. lower bits (first character is `upper`).
const fn generate_byte_map(upper: bool) -> [u8; 256] {
let mut out = [0u8; 256];
let factor = if upper { 16 } else { 1 };
let mut i = 0;
while i < 256 {
match i {
0x30..=0x39 => out[i] = factor * (i as u8 - 0x30),
0x41..=0x46 => out[i] = factor * (9 + i as u8 - 0x40),
0x61..=0x66 => out[i] = factor * (9 + i as u8 - 0x60),
_ => out[i] = u8::MAX,
}
i += 1;
}
out
}
const BYTE_MAP_UPPER: [u8; 256] = generate_byte_map(true);
const BYTE_MAP_LOWER: [u8; 256] = generate_byte_map(false);
macro_rules! make_parse_hex {
($name:ident, $ty:ty, $len:expr) => {
#[inline]
pub fn $name(s: impl AsRef<str>) -> Result<$ty> {
// For the string to be valid hex characters, it needs to be ASCII.
// So we can simply treat it as a byte stream.
let s = s.as_ref().as_bytes();
if s.len() != $len {
bail!(
"String length doesn't match. Expected {}, got {}",
$len,
s.len()
);
}
let n = $len / 2;
let mut out: $ty = 0;
let mut i = 0;
while i < n {
let j = i * 2;
let c1 = BYTE_MAP_UPPER[s[j] as usize];
if c1 == u8::MAX {
bail!("Invalid character '{:?}' ({})", char::from(c1), c1);
}
let c2 = BYTE_MAP_LOWER[s[j + 1] as usize];
if c2 == u8::MAX {
bail!("Invalid character '{:?}' ({})", char::from(c2), c2);
}
out |= ((c1 + c2) as $ty) << (n - i - 1) * 8;
i += 1;
}
Ok(out)
}
};
}
make_parse_hex!(parse_hex64, u64, 16);
make_parse_hex!(parse_hex32, u32, 8);
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parse_32() {
let hash = "A14E8DFA";
assert_eq!(parse_hex32(hash).unwrap(), 0xA14E8DFA);
}
#[test]
fn parse_64() {
let hash = "A14E8DFA2CD117E2";
assert_eq!(parse_hex64(hash).unwrap(), 0xA14E8DFA2CD117E2);
}
#[test]
fn std_from_radix_32() {
let hash = "A14E8DFA";
assert_eq!(u32::from_str_radix(hash, 16).unwrap(), 0xA14E8DFA);
}
#[test]
fn std_from_radix_64() {
let hash = "A14E8DFA2CD117E2";
assert_eq!(u64::from_str_radix(hash, 16).unwrap(), 0xA14E8DFA2CD117E2);
}
}
#[cfg(test)]
mod bench {
use super::{parse_hex32, parse_hex64};
extern crate test;
const HASH32: &str = "A14E8DFA";
const HASH64: &str = "A14E8DFA2CD117E2";
#[bench]
fn custom_32(b: &mut test::Bencher) {
b.iter(|| test::black_box(parse_hex32(test::black_box(HASH32))))
}
#[bench]
fn std_32(b: &mut test::Bencher) {
b.iter(|| test::black_box(u32::from_str_radix(test::black_box(HASH32), 16)))
}
#[bench]
fn custom_64(b: &mut test::Bencher) {
b.iter(|| test::black_box(parse_hex64(test::black_box(HASH64))))
}
#[bench]
fn std_64(b: &mut test::Bencher) {
b.iter(|| test::black_box(u64::from_str_radix(test::black_box(HASH64), 16)))
}
}