sdk: Improve word generation

2023-09-19 15:28:40 +02:00 · 2023-09-19 15:28:40 +02:00 · 951a7f82c0
commit 951a7f82c0
parent 4480144d92
1 changed files with 79 additions and 85 deletions
--- a/crates/dtmt/src/cmd/experiment/brute_force_words.rs
+++ b/crates/dtmt/src/cmd/experiment/brute_force_words.rs
@ -70,6 +70,7 @@ pub(crate) fn command_definition() -> Command {
                        Hashes are expected in hexadecimal notiation. \
                        Only 64-bit hashes are supported."
                )
                .required(true)
                .value_parser(value_parser!(PathBuf)),
        )
 }
@ -98,7 +99,10 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
        eyre::bail!("Word list must not be empty");
    }
-    let hashes = if let Some(path) = matches.get_one::<PathBuf>("hashes") {
+    let hashes = {
        let path = matches
            .get_one::<PathBuf>("hashes")
            .expect("missing required argument");
        let content = fs::read_to_string(&path)
            .await
            .wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
@ -112,9 +116,7 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
        tracing::trace!("{:?}", hashes);
-        Some(hashes)
+        hashes
    } else {
        None
    };
    let mut delimiters: Vec<String> = matches
@ -250,91 +252,87 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
        // one.
        let delimiter_count = sequence.len() as u32 - 1;
        for prefix in prefixes.iter().map(|p| p.as_bytes()) {
            buf.clear();
            // We can keep the prefix at the front of the buffer and only
            // replace the parts after that.
            let prefix_len = prefix.len();
            buf.extend_from_slice(prefix);
            for delims in delimiter_lists
                .iter()
                .take(delimiters_len.pow(delimiter_count))
            {
                buf.truncate(prefix_len);
                let delims = delims
                    .iter()
                    .map(|s| s.as_str())
                    .take(delimiter_count as usize);
-            let s = sequence.iter().copied().interleave(delims.clone());
+                sequence
-
+                    .iter()
-            buf.clear();
+                    .copied()
-
+                    .interleave(delims.clone())
            for prefix in prefixes.iter() {
                buf.extend_from_slice(prefix.as_bytes());
                s.clone()
                    .for_each(|word| buf.extend_from_slice(word.as_bytes()));
-                if let Some(hashes) = &hashes {
+                count += 1;
                let hash = Murmur64::hash(&buf);
                if hashes.contains(&hash) {
                    found += 1;
                    buf.push(LINE_FEED);
                    writer.write_all(&buf).await?;
                    }
                    buf.clear();
                } else {
-                    buf.push(LINE_FEED);
+                    let word_len = buf.len();
                }
-                for i in 0..=9 {
+                    // If the regular word itself didn't match, we check
-                    buf.extend_from_slice(prefix.as_bytes());
+                    // for numbered suffixes.
-                    s.clone()
+                    // For now, we only check up to `09` to avoid more complex logic
-                        .for_each(|word| buf.extend_from_slice(word.as_bytes()));
+                    // writing into the buffer.
-                    buf.push(UNDERSCORE);
+                    // Packages that contain files with higher numbers than this
-                    buf.push(ZERO + i);
+                    // should hopefully become easier to spot once a good number of
-
+                    // hashes is found.
-                    if let Some(hashes) = &hashes {
+                    for i in 1..=9 {
-                        let hash = Murmur64::hash(&buf);
+                        buf.truncate(word_len);
                        if hashes.contains(&hash) {
                            found += 1;
                            buf.push(LINE_FEED);
                            writer.write_all(&buf).await?;
                        }
                        buf.clear();
                    } else {
                        buf.push(LINE_FEED);
                    }
                    buf.extend_from_slice(prefix.as_bytes());
                    s.clone()
                        .for_each(|word| buf.extend_from_slice(word.as_bytes()));
                        buf.push(UNDERSCORE);
                        buf.push(ZERO);
                        buf.push(ZERO + i);
-                    if let Some(hashes) = &hashes {
+                        count += 1;
                        let hash = Murmur64::hash(&buf);
                        if hashes.contains(&hash) {
                            found += 1;
                            buf.push(LINE_FEED);
                            writer.write_all(&buf).await?;
                        }
                        buf.clear();
                        } else {
-                        buf.push(LINE_FEED);
+                            break;
                        }
                    }
                }
            }
        }
            if let Some(hashes) = &hashes {
                count += prefixes.len() * 20;
        let dur = Instant::now() - start;
        if dur.as_secs() >= 1 {
            let hashes_len = hashes.len();
            let s = String::from_utf8_lossy(&buf);
            // The last prefix in the set is the one that will stay in the buffer
            // when we're about to print here.
            // So we strip that, to show just the generated part.
            // We also restrict the length to stay on a single line.
            let prefix_len = prefixes[28].len();
            let s = s[prefix_len..std::cmp::min(s.len(), prefix_len + 60)]
                .trim_end()
                .to_string();
            // Don't care when it finishes, don't care if it fails.
            tokio::spawn(async move {
                let _ = tokio::io::stderr()
                    .write_all(
                        format!(
-                                    "\r{} hashes per second, {}/{} found",
+                            "\r{:8} hashes per second | {:6}/{} found | {:<60}",
-                                    count, found, hashes_len
+                            count, found, hashes_len, s
                        )
                        .as_bytes(),
                    )
@ -344,10 +342,6 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
            start = Instant::now();
            count = 0;
        }
            } else {
                writer.write_all(&buf).await?;
            }
        }
        for i in 0..indices_len {
            let index = indices.get_mut(i).unwrap();
@ -358,15 +352,15 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
                *word = words[*index].as_str();
                if indices.get(i + 1).is_none() {
                    indices.push(0);
                    sequence.push(words[0].as_str());
                    indices_len += 1;
                    if indices_len > max_length {
                        break 'outer;
                    }
                    indices.push(0);
                    sequence.push(words[0].as_str());
                    break;
                }
            } else {