sdk: Implement guessing a list of hashes

While the approach to generate and store a list of strings does allow
for this list to be re-used in the future, the I/O involved turned out
to be quite costly.

While the generation can run at up to 500 MiB/s, even compressing that
on the fly doesn't reach fast enough write speeds on a HDD.
And compression is also necessary to store this amount of data
(generation reached two TB of raw data with a word length of just three,
which is still 600 GB compressed).
But compression also makes working with that data a lot harder.

So this instead combines both the generation and search into a single
step. The intermediate result of the generation is therefore lost,
but the overall pipeline is much faster.
This commit is contained in:
Lucas Schwiderski 2023-09-18 13:29:42 +02:00
parent 0d1193a126
commit 4480144d92
Signed by: lucas
GPG key ID: AA12679AAA6DF4D8

View file

@ -1,11 +1,14 @@
use std::collections::HashSet;
use std::path::PathBuf;
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
use color_eyre::eyre::{self, Context};
use color_eyre::Result;
use itertools::Itertools;
use sdk::murmur::Murmur64;
use tokio::fs;
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::io::AsyncWriteExt;
use tokio::time::Instant;
pub(crate) fn command_definition() -> Command {
Command::new("brute-force-words")
@ -60,6 +63,15 @@ pub(crate) fn command_definition() -> Command {
.required(true)
.value_parser(value_parser!(PathBuf)),
)
.arg(
Arg::new("hashes")
.help(
"Path to a file containing the hashes to attempt to brute force. \
Hashes are expected in hexadecimal notiation. \
Only 64-bit hashes are supported."
)
.value_parser(value_parser!(PathBuf)),
)
}
#[tracing::instrument(skip_all)]
@ -86,6 +98,25 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
eyre::bail!("Word list must not be empty");
}
let hashes = if let Some(path) = matches.get_one::<PathBuf>("hashes") {
let content = fs::read_to_string(&path)
.await
.wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
let hashes: Result<HashSet<_>, _> = content
.lines()
.map(|s| u64::from_str_radix(s, 16).map(Murmur64::from))
.collect();
let hashes = hashes?;
tracing::trace!("{:?}", hashes);
Some(hashes)
} else {
None
};
let mut delimiters: Vec<String> = matches
.get_many::<String>("delimiter")
.unwrap_or_default()
@ -163,8 +194,6 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
tracing::debug!("{:?}", delimiter_lists);
let mut count = 0u64;
let mut indices = if let Some(cont) = matches.get_one::<String>("continue").cloned() {
let mut splits = vec![cont.clone()];
@ -204,7 +233,12 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
indices.reserve(max_length);
sequence.reserve(max_length);
let mut writer = BufWriter::new(tokio::io::stdout());
let mut count: usize = 0;
let mut found: usize = 0;
let mut start = Instant::now();
// let mut writer = BufWriter::new(tokio::io::stdout());
let mut writer = tokio::io::stdout();
let mut buf = Vec::with_capacity(1024);
const LINE_FEED: u8 = 0x0A;
@ -216,13 +250,6 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
// one.
let delimiter_count = sequence.len() as u32 - 1;
tracing::trace!(
"{} | {:?} -> {:?}",
delimiters_len.pow(delimiter_count),
indices,
sequence
);
for delims in delimiter_lists
.iter()
.take(delimiters_len.pow(delimiter_count))
@ -233,16 +260,25 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
.take(delimiter_count as usize);
let s = sequence.iter().copied().interleave(delims.clone());
count = count.wrapping_add(1);
buf.clear();
for prefix in prefixes.iter() {
buf.extend_from_slice(prefix.as_bytes());
s.clone()
.for_each(|word| buf.extend_from_slice(word.as_bytes()));
// buf.extend_from_slice(s.as_bytes());
if let Some(hashes) = &hashes {
let hash = Murmur64::hash(&buf);
if hashes.contains(&hash) {
found += 1;
buf.push(LINE_FEED);
writer.write_all(&buf).await?;
}
buf.clear();
} else {
buf.push(LINE_FEED);
}
for i in 0..=9 {
buf.extend_from_slice(prefix.as_bytes());
@ -250,7 +286,19 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
.for_each(|word| buf.extend_from_slice(word.as_bytes()));
buf.push(UNDERSCORE);
buf.push(ZERO + i);
if let Some(hashes) = &hashes {
let hash = Murmur64::hash(&buf);
if hashes.contains(&hash) {
found += 1;
buf.push(LINE_FEED);
writer.write_all(&buf).await?;
}
buf.clear();
} else {
buf.push(LINE_FEED);
}
buf.extend_from_slice(prefix.as_bytes());
s.clone()
@ -258,12 +306,48 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
buf.push(UNDERSCORE);
buf.push(ZERO);
buf.push(ZERO + i);
if let Some(hashes) = &hashes {
let hash = Murmur64::hash(&buf);
if hashes.contains(&hash) {
found += 1;
buf.push(LINE_FEED);
writer.write_all(&buf).await?;
}
buf.clear();
} else {
buf.push(LINE_FEED);
}
}
}
if let Some(hashes) = &hashes {
count += prefixes.len() * 20;
let dur = Instant::now() - start;
if dur.as_secs() >= 1 {
let hashes_len = hashes.len();
// Don't care when it finishes, don't care if it fails.
tokio::spawn(async move {
let _ = tokio::io::stderr()
.write_all(
format!(
"\r{} hashes per second, {}/{} found",
count, found, hashes_len
)
.as_bytes(),
)
.await;
});
start = Instant::now();
count = 0;
}
} else {
writer.write_all(&buf).await?;
}
}
for i in 0..indices_len {
let index = indices.get_mut(i).unwrap();