diff --git a/crates/dtmt/src/cmd/experiment/brute_force_words.rs b/crates/dtmt/src/cmd/experiment/brute_force_words.rs index bb3aa9e..7e93dcc 100644 --- a/crates/dtmt/src/cmd/experiment/brute_force_words.rs +++ b/crates/dtmt/src/cmd/experiment/brute_force_words.rs @@ -70,6 +70,7 @@ pub(crate) fn command_definition() -> Command { Hashes are expected in hexadecimal notiation. \ Only 64-bit hashes are supported." ) + .required(true) .value_parser(value_parser!(PathBuf)), ) } @@ -98,7 +99,10 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> eyre::bail!("Word list must not be empty"); } - let hashes = if let Some(path) = matches.get_one::("hashes") { + let hashes = { + let path = matches + .get_one::("hashes") + .expect("missing required argument"); let content = fs::read_to_string(&path) .await .wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?; @@ -112,9 +116,7 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> tracing::trace!("{:?}", hashes); - Some(hashes) - } else { - None + hashes }; let mut delimiters: Vec = matches @@ -250,103 +252,95 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> // one. let delimiter_count = sequence.len() as u32 - 1; - for delims in delimiter_lists - .iter() - .take(delimiters_len.pow(delimiter_count)) - { - let delims = delims - .iter() - .map(|s| s.as_str()) - .take(delimiter_count as usize); - let s = sequence.iter().copied().interleave(delims.clone()); - + for prefix in prefixes.iter().map(|p| p.as_bytes()) { buf.clear(); - for prefix in prefixes.iter() { - buf.extend_from_slice(prefix.as_bytes()); - s.clone() + // We can keep the prefix at the front of the buffer and only + // replace the parts after that. + let prefix_len = prefix.len(); + buf.extend_from_slice(prefix); + + for delims in delimiter_lists + .iter() + .take(delimiters_len.pow(delimiter_count)) + { + buf.truncate(prefix_len); + + let delims = delims + .iter() + .map(|s| s.as_str()) + .take(delimiter_count as usize); + sequence + .iter() + .copied() + .interleave(delims.clone()) .for_each(|word| buf.extend_from_slice(word.as_bytes())); - if let Some(hashes) = &hashes { - let hash = Murmur64::hash(&buf); - if hashes.contains(&hash) { - found += 1; - buf.push(LINE_FEED); - writer.write_all(&buf).await?; - } + count += 1; - buf.clear(); - } else { + let hash = Murmur64::hash(&buf); + if hashes.contains(&hash) { + found += 1; buf.push(LINE_FEED); - } + writer.write_all(&buf).await?; + } else { + let word_len = buf.len(); - for i in 0..=9 { - buf.extend_from_slice(prefix.as_bytes()); - s.clone() - .for_each(|word| buf.extend_from_slice(word.as_bytes())); - buf.push(UNDERSCORE); - buf.push(ZERO + i); + // If the regular word itself didn't match, we check + // for numbered suffixes. + // For now, we only check up to `09` to avoid more complex logic + // writing into the buffer. + // Packages that contain files with higher numbers than this + // should hopefully become easier to spot once a good number of + // hashes is found. + for i in 1..=9 { + buf.truncate(word_len); + buf.push(UNDERSCORE); + buf.push(ZERO); + buf.push(ZERO + i); + + count += 1; - if let Some(hashes) = &hashes { let hash = Murmur64::hash(&buf); if hashes.contains(&hash) { found += 1; buf.push(LINE_FEED); writer.write_all(&buf).await?; + } else { + break; } - - buf.clear(); - } else { - buf.push(LINE_FEED); - } - - buf.extend_from_slice(prefix.as_bytes()); - s.clone() - .for_each(|word| buf.extend_from_slice(word.as_bytes())); - buf.push(UNDERSCORE); - buf.push(ZERO); - buf.push(ZERO + i); - - if let Some(hashes) = &hashes { - let hash = Murmur64::hash(&buf); - if hashes.contains(&hash) { - found += 1; - buf.push(LINE_FEED); - writer.write_all(&buf).await?; - } - - buf.clear(); - } else { - buf.push(LINE_FEED); } } } + } - if let Some(hashes) = &hashes { - count += prefixes.len() * 20; + let dur = Instant::now() - start; + if dur.as_secs() >= 1 { + let hashes_len = hashes.len(); + let s = String::from_utf8_lossy(&buf); + // The last prefix in the set is the one that will stay in the buffer + // when we're about to print here. + // So we strip that, to show just the generated part. + // We also restrict the length to stay on a single line. + let prefix_len = prefixes[28].len(); + let s = s[prefix_len..std::cmp::min(s.len(), prefix_len + 60)] + .trim_end() + .to_string(); + // Don't care when it finishes, don't care if it fails. + tokio::spawn(async move { + let _ = tokio::io::stderr() + .write_all( + format!( + "\r{:8} hashes per second | {:6}/{} found | {:<60}", + count, found, hashes_len, s + ) + .as_bytes(), + ) + .await; + }); - let dur = Instant::now() - start; - if dur.as_secs() >= 1 { - let hashes_len = hashes.len(); - // Don't care when it finishes, don't care if it fails. - tokio::spawn(async move { - let _ = tokio::io::stderr() - .write_all( - format!( - "\r{} hashes per second, {}/{} found", - count, found, hashes_len - ) - .as_bytes(), - ) - .await; - }); - - start = Instant::now(); - count = 0; - } - } else { - writer.write_all(&buf).await?; - } + start = Instant::now(); + count = 0; } for i in 0..indices_len { @@ -358,15 +352,15 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> *word = words[*index].as_str(); if indices.get(i + 1).is_none() { - indices.push(0); - sequence.push(words[0].as_str()); - indices_len += 1; if indices_len > max_length { break 'outer; } + indices.push(0); + sequence.push(words[0].as_str()); + break; } } else {