sdk: Improve word generation

This commit is contained in:
Lucas Schwiderski 2023-09-19 15:28:40 +02:00
parent 4480144d92
commit 951a7f82c0
Signed by: lucas
GPG key ID: AA12679AAA6DF4D8

View file

@ -70,6 +70,7 @@ pub(crate) fn command_definition() -> Command {
Hashes are expected in hexadecimal notiation. \ Hashes are expected in hexadecimal notiation. \
Only 64-bit hashes are supported." Only 64-bit hashes are supported."
) )
.required(true)
.value_parser(value_parser!(PathBuf)), .value_parser(value_parser!(PathBuf)),
) )
} }
@ -98,7 +99,10 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
eyre::bail!("Word list must not be empty"); eyre::bail!("Word list must not be empty");
} }
let hashes = if let Some(path) = matches.get_one::<PathBuf>("hashes") { let hashes = {
let path = matches
.get_one::<PathBuf>("hashes")
.expect("missing required argument");
let content = fs::read_to_string(&path) let content = fs::read_to_string(&path)
.await .await
.wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?; .wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
@ -112,9 +116,7 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
tracing::trace!("{:?}", hashes); tracing::trace!("{:?}", hashes);
Some(hashes) hashes
} else {
None
}; };
let mut delimiters: Vec<String> = matches let mut delimiters: Vec<String> = matches
@ -250,91 +252,87 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
// one. // one.
let delimiter_count = sequence.len() as u32 - 1; let delimiter_count = sequence.len() as u32 - 1;
for prefix in prefixes.iter().map(|p| p.as_bytes()) {
buf.clear();
// We can keep the prefix at the front of the buffer and only
// replace the parts after that.
let prefix_len = prefix.len();
buf.extend_from_slice(prefix);
for delims in delimiter_lists for delims in delimiter_lists
.iter() .iter()
.take(delimiters_len.pow(delimiter_count)) .take(delimiters_len.pow(delimiter_count))
{ {
buf.truncate(prefix_len);
let delims = delims let delims = delims
.iter() .iter()
.map(|s| s.as_str()) .map(|s| s.as_str())
.take(delimiter_count as usize); .take(delimiter_count as usize);
let s = sequence.iter().copied().interleave(delims.clone()); sequence
.iter()
buf.clear(); .copied()
.interleave(delims.clone())
for prefix in prefixes.iter() {
buf.extend_from_slice(prefix.as_bytes());
s.clone()
.for_each(|word| buf.extend_from_slice(word.as_bytes())); .for_each(|word| buf.extend_from_slice(word.as_bytes()));
if let Some(hashes) = &hashes { count += 1;
let hash = Murmur64::hash(&buf); let hash = Murmur64::hash(&buf);
if hashes.contains(&hash) { if hashes.contains(&hash) {
found += 1; found += 1;
buf.push(LINE_FEED); buf.push(LINE_FEED);
writer.write_all(&buf).await?; writer.write_all(&buf).await?;
}
buf.clear();
} else { } else {
buf.push(LINE_FEED); let word_len = buf.len();
}
for i in 0..=9 { // If the regular word itself didn't match, we check
buf.extend_from_slice(prefix.as_bytes()); // for numbered suffixes.
s.clone() // For now, we only check up to `09` to avoid more complex logic
.for_each(|word| buf.extend_from_slice(word.as_bytes())); // writing into the buffer.
buf.push(UNDERSCORE); // Packages that contain files with higher numbers than this
buf.push(ZERO + i); // should hopefully become easier to spot once a good number of
// hashes is found.
if let Some(hashes) = &hashes { for i in 1..=9 {
let hash = Murmur64::hash(&buf); buf.truncate(word_len);
if hashes.contains(&hash) {
found += 1;
buf.push(LINE_FEED);
writer.write_all(&buf).await?;
}
buf.clear();
} else {
buf.push(LINE_FEED);
}
buf.extend_from_slice(prefix.as_bytes());
s.clone()
.for_each(|word| buf.extend_from_slice(word.as_bytes()));
buf.push(UNDERSCORE); buf.push(UNDERSCORE);
buf.push(ZERO); buf.push(ZERO);
buf.push(ZERO + i); buf.push(ZERO + i);
if let Some(hashes) = &hashes { count += 1;
let hash = Murmur64::hash(&buf); let hash = Murmur64::hash(&buf);
if hashes.contains(&hash) { if hashes.contains(&hash) {
found += 1; found += 1;
buf.push(LINE_FEED); buf.push(LINE_FEED);
writer.write_all(&buf).await?; writer.write_all(&buf).await?;
}
buf.clear();
} else { } else {
buf.push(LINE_FEED); break;
}
}
} }
} }
} }
if let Some(hashes) = &hashes {
count += prefixes.len() * 20;
let dur = Instant::now() - start; let dur = Instant::now() - start;
if dur.as_secs() >= 1 { if dur.as_secs() >= 1 {
let hashes_len = hashes.len(); let hashes_len = hashes.len();
let s = String::from_utf8_lossy(&buf);
// The last prefix in the set is the one that will stay in the buffer
// when we're about to print here.
// So we strip that, to show just the generated part.
// We also restrict the length to stay on a single line.
let prefix_len = prefixes[28].len();
let s = s[prefix_len..std::cmp::min(s.len(), prefix_len + 60)]
.trim_end()
.to_string();
// Don't care when it finishes, don't care if it fails. // Don't care when it finishes, don't care if it fails.
tokio::spawn(async move { tokio::spawn(async move {
let _ = tokio::io::stderr() let _ = tokio::io::stderr()
.write_all( .write_all(
format!( format!(
"\r{} hashes per second, {}/{} found", "\r{:8} hashes per second | {:6}/{} found | {:<60}",
count, found, hashes_len count, found, hashes_len, s
) )
.as_bytes(), .as_bytes(),
) )
@ -344,10 +342,6 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
start = Instant::now(); start = Instant::now();
count = 0; count = 0;
} }
} else {
writer.write_all(&buf).await?;
}
}
for i in 0..indices_len { for i in 0..indices_len {
let index = indices.get_mut(i).unwrap(); let index = indices.get_mut(i).unwrap();
@ -358,15 +352,15 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
*word = words[*index].as_str(); *word = words[*index].as_str();
if indices.get(i + 1).is_none() { if indices.get(i + 1).is_none() {
indices.push(0);
sequence.push(words[0].as_str());
indices_len += 1; indices_len += 1;
if indices_len > max_length { if indices_len > max_length {
break 'outer; break 'outer;
} }
indices.push(0);
sequence.push(words[0].as_str());
break; break;
} }
} else { } else {