6 changed files with 2 additions and 1097 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -161,17 +161,6 @@ dependencies = [
 "system-deps",
 ]
 [[package]]
 name = "atty"
 version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
 dependencies = [
 "hermit-abi 0.1.19",
 "libc",
 "winapi",
 ]
 [[package]]
 name = "autocfg"
 version = "1.3.0"
@ -223,7 +212,7 @@ dependencies = [
 "bitflags 2.5.0",
 "cexpr",
 "clang-sys",
- "itertools 0.12.1",
+ "itertools",
 "lazy_static",
 "lazycell",
 "log",
@ -658,20 +647,6 @@ dependencies = [
 "cfg-if",
 ]
 [[package]]
 name = "crossbeam"
 version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c"
 dependencies = [
 "cfg-if",
 "crossbeam-channel",
 "crossbeam-deque",
 "crossbeam-epoch",
 "crossbeam-queue",
 "crossbeam-utils",
 ]
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.12"
@ -681,40 +656,6 @@ dependencies = [
 "crossbeam-utils",
 ]
 [[package]]
 name = "crossbeam-deque"
 version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
 dependencies = [
 "cfg-if",
 "crossbeam-epoch",
 "crossbeam-utils",
 ]
 [[package]]
 name = "crossbeam-epoch"
 version = "0.9.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
 dependencies = [
 "autocfg",
 "cfg-if",
 "crossbeam-utils",
 "memoffset 0.9.1",
 "scopeguard",
 ]
 [[package]]
 name = "crossbeam-queue"
 version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add"
 dependencies = [
 "cfg-if",
 "crossbeam-utils",
 ]
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.20"
@ -986,18 +927,15 @@ name = "dtmt"
 version = "0.3.0"
 dependencies = [
 "async-recursion",
 "atty",
 "clap",
 "cli-table",
 "color-eyre",
 "confy",
 "crossbeam",
 "csv-async",
 "dtmt-shared",
 "futures",
 "futures-util",
 "glob",
 "itertools 0.11.0",
 "luajit2-sys",
 "nanorand",
 "notify",
@ -1660,15 +1598,6 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 [[package]]
 name = "hermit-abi"
 version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
 dependencies = [
 "libc",
 ]
 [[package]]
 name = "hermit-abi"
 version = "0.3.9"
@ -1929,15 +1858,6 @@ version = "1.70.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
 [[package]]
 name = "itertools"
 version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
 dependencies = [
 "either",
 ]
 [[package]]
 name = "itertools"
 version = "0.12.1"
@ -2347,7 +2267,7 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
 dependencies = [
- "hermit-abi 0.3.9",
+ "hermit-abi",
 "libc",
 ]
--- a/crates/dtmt/Cargo.toml
+++ b/crates/dtmt/Cargo.toml
@ -33,9 +33,6 @@ async-recursion = "1.0.2"
 notify = "6.1.1"
 luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
 shlex = { version = "1.2.0", optional = true }
 atty = "0.2.14"
 itertools = "0.11.0"
 crossbeam = { version = "0.8.2", features = ["crossbeam-deque"] }
 [dev-dependencies]
 tempfile = "3.3.0"
--- a/crates/dtmt/src/cmd/experiment/brute_force_words.rs
+++ b/crates/dtmt/src/cmd/experiment/brute_force_words.rs
@ -1,520 +0,0 @@
 use std::collections::HashSet;
 use std::fs;
 use std::io::{BufWriter, Write};
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::thread::JoinHandle;
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
 use color_eyre::eyre::{self, Context};
 use color_eyre::Result;
 use crossbeam::channel::{bounded, unbounded, Receiver, Sender};
 use itertools::Itertools;
 use sdk::murmur::Murmur64;
 use tokio::time::Instant;
 pub(crate) fn command_definition() -> Command {
    Command::new("brute-force-words")
        .about(
            "Given a list of words and a set of delimiters, iteratevily creates permutations \
                of growing length.\n\
                Delimiters are placed between every word in the result.\n\n\
                Example: \
                Given the words ['packages', 'boot'], the delimiters ['/', '_'] and a length of 2, the resulting \
                words will be\n\
                - packages\n\
                - boot\n\
                - packages/packages\n\
                - packages_packages\n\
                - packages/boot\n\
                - packages_boot\n\
                - boot/packages\n\
                - boot_packages\n\
                - boot/boot\n\
                - boot_boot",
        )
        .arg(
            Arg::new("delimiter")
                .help(
                    "The delimiters to put between the words. \
                        All permutations of this list will be tried for every string of words.\n\
                        Specify multiple times to set multiple values.\n\
                        Defaults to ['/', '_'].",
                )
                .short('d')
                .long("delimiter")
                .action(ArgAction::Append),
        )
        .arg(
            Arg::new("max-length")
                .help("The maximum number of words up to which to build strings.")
                .long("max")
                .long("max-length")
                .short('m')
                .default_value("5")
                .value_parser(value_parser!(usize)),
        )
        .arg(
            Arg::new("continue")
                .help("Can be used to continue a previous operation where it stopped. Word list and delimiters must match.")
                .short('c')
                .long("continue")
        )
        .arg(
            Arg::new("threads")
                .help("The number of workers to run in parallel.")
                .long("threads")
                .short('n')
                .default_value("6")
                .value_parser(value_parser!(usize))
        )
        .arg(
            Arg::new("words")
                .help("Path to a file containing words line by line.")
                .required(true)
                .value_parser(value_parser!(PathBuf)),
        )
        .arg(
            Arg::new("hashes")
                .help(
                    "Path to a file containing the hashes to attempt to brute force. \
                        Hashes are expected in hexadecimal notiation. \
                        Only 64-bit hashes are supported."
                )
                .required(true)
                .value_parser(value_parser!(PathBuf)),
        )
 }
 const LINE_FEED: u8 = 0x0A;
 const UNDERSCORE: u8 = 0x5F;
 const ZERO: u8 = 0x30;
 const PREFIXES: [&str; 36] = [
    "",
    "content/characters/",
    "content/debug/",
    "content/decals/",
    "content/environment/",
    "content/fx/",
    "content/fx/particles/",
    "content/gizmos/",
    "content/items/",
    "content/levels/",
    "content/liquid_area/",
    "content/localization/",
    "content/materials/",
    "content/minion_impact_assets/",
    "content/pickups/",
    "content/shading_environments/",
    "content/textures/",
    "content/ui/",
    "content/videos/",
    "content/vo/",
    "content/volume_types/",
    "content/weapons/",
    "content/",
    "core/",
    "core/units/",
    "packages/boot_assets/",
    "packages/content/",
    "packages/game_scripts/",
    "packages/strings/",
    "packages/ui/",
    "packages/",
    "wwise/events/",
    "wwise/packages/",
    "wwise/world_sound_fx/",
    "wwise/events/weapons/",
    "wwise/events/minions/",
 ];
 fn make_info_printer(rx: Receiver<(usize, usize, String)>, hash_count: usize) -> JoinHandle<()> {
    std::thread::spawn(move || {
        let mut writer = std::io::stderr();
        let mut total_count = 0;
        let mut total_found = 0;
        let mut start = Instant::now();
        while let Ok((count, found, last)) = rx.recv() {
            total_count += count;
            total_found += found;
            let now = Instant::now();
            if (now - start).as_millis() > 250 {
                let s = &last[0..std::cmp::min(last.len(), 60)];
                let s = format!(
                    "\r{:12} per second | {total_found:6}/{hash_count} found | {s:<60}",
                    total_count * 4
                );
                writer.write_all(s.as_bytes()).unwrap();
                total_count = 0;
                start = now;
            }
        }
    })
 }
 fn make_stdout_printer(rx: Receiver<Vec<u8>>) -> JoinHandle<()> {
    std::thread::spawn(move || {
        let mut writer = std::io::stdout();
        while let Ok(buf) = rx.recv() {
            writer.write_all(&buf).unwrap();
        }
    })
 }
 struct State {
    delimiter_lists: Arc<Vec<Vec<String>>>,
    hashes: Arc<HashSet<Murmur64>>,
    words: Arc<Vec<String>>,
    delimiters_len: usize,
    stdout_tx: Sender<Vec<u8>>,
    info_tx: Sender<(usize, usize, String)>,
 }
 fn make_worker(rx: Receiver<Vec<usize>>, state: State) -> JoinHandle<()> {
    std::thread::spawn(move || {
        let delimiter_lists = &state.delimiter_lists;
        let hashes = &state.hashes;
        let words = &state.words;
        let delimiters_len = state.delimiters_len;
        let mut count = 0;
        let mut found = 0;
        let mut buf = Vec::with_capacity(1024);
        while let Ok(indices) = rx.recv() {
            let sequence = indices.iter().map(|i| words[*i].as_str());
            // We only want delimiters between words, so we keep that iterator shorter by
            // one.
            let delimiter_count = sequence.len() as u32 - 1;
            for prefix in PREFIXES.iter().map(|p| p.as_bytes()) {
                buf.clear();
                // We can keep the prefix at the front of the buffer and only
                // replace the parts after that.
                let prefix_len = prefix.len();
                buf.extend_from_slice(prefix);
                for delims in delimiter_lists
                    .iter()
                    .take(delimiters_len.pow(delimiter_count))
                {
                    buf.truncate(prefix_len);
                    let delims = delims
                        .iter()
                        .map(|s| s.as_str())
                        .take(delimiter_count as usize);
                    sequence
                        .clone()
                        .interleave(delims.clone())
                        .for_each(|word| buf.extend_from_slice(word.as_bytes()));
                    count += 1;
                    let hash = Murmur64::hash(&buf);
                    if hashes.contains(&hash) {
                        found += 1;
                        buf.push(LINE_FEED);
                        if state.stdout_tx.send(buf.clone()).is_err() {
                            return;
                        }
                    } else {
                        let word_len = buf.len();
                        // If the regular word itself didn't match, we check
                        // for numbered suffixes.
                        // For now, we only check up to `09` to avoid more complex logic
                        // writing into the buffer.
                        // Packages that contain files with higher numbers than this
                        // should hopefully become easier to spot once a good number of
                        // hashes is found.
                        for i in 1..=9 {
                            buf.truncate(word_len);
                            buf.push(UNDERSCORE);
                            buf.push(ZERO);
                            buf.push(ZERO + i);
                            count += 1;
                            let hash = Murmur64::hash(&buf);
                            if hashes.contains(&hash) {
                                found += 1;
                                buf.push(LINE_FEED);
                                if state.stdout_tx.send(buf.clone()).is_err() {
                                    return;
                                }
                            } else {
                                break;
                            }
                        }
                    }
                }
            }
            if count >= 2 * 1024 * 1024 {
                // The last prefix in the set is the one that will stay in the buffer
                // when we're about to print here.
                // So we strip that, to show just the generated part.
                // We also restrict the length to stay on a single line.
                let prefix_len = PREFIXES[35].len();
                // No need to wait for this
                let _ = state.info_tx.try_send((
                    count,
                    found,
                    String::from_utf8_lossy(&buf[prefix_len..]).to_string(),
                ));
                count = 0;
                found = 0;
            }
        }
    })
 }
 fn build_delimiter_lists(delimiters: impl AsRef<[String]>, max_length: usize) -> Vec<Vec<String>> {
    let delimiters = delimiters.as_ref();
    let mut indices = vec![0; max_length];
    let mut list = Vec::new();
    for _ in 0..delimiters.len().pow(max_length as u32) {
        list.push(
            indices
                .iter()
                .map(|i| delimiters[*i].clone())
                .collect::<Vec<_>>(),
        );
        for v in indices.iter_mut() {
            if *v >= delimiters.len() - 1 {
                *v = 0;
                break;
            } else {
                *v += 1;
            }
        }
    }
    list
 }
 fn build_initial_indices(
    cont: Option<&String>,
    delimiters: impl AsRef<[String]>,
    words: impl AsRef<[String]>,
 ) -> Result<Vec<usize>> {
    if let Some(cont) = cont {
        let mut splits = vec![cont.clone()];
        for delim in delimiters.as_ref().iter() {
            splits = splits
                .iter()
                .flat_map(|s| s.split(delim))
                .map(|s| s.to_string())
                .collect();
        }
        let indices = splits
            .into_iter()
            .map(|s| {
                words
                    .as_ref()
                    .iter()
                    .enumerate()
                    .find(|(_, v)| s == **v)
                    .map(|(i, _)| i)
                    .ok_or_else(|| eyre::eyre!("'{}' is not in the word list", s))
            })
            .collect::<Result<_>>()?;
        tracing::info!("Continuing from '{}' -> '{:?}'", cont, &indices);
        Ok(indices)
    } else {
        Ok(vec![0])
    }
 }
 #[tracing::instrument(skip_all)]
 #[allow(clippy::mut_range_bound)]
 pub(crate) fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
    let max_length: usize = matches
        .get_one::<usize>("max-length")
        .copied()
        .expect("parameter has default");
    let num_threads: usize = matches
        .get_one::<usize>("threads")
        .copied()
        .expect("parameter has default");
    let words = {
        let path = matches
            .get_one::<PathBuf>("words")
            .expect("missing required parameter");
        let file = fs::read_to_string(path)
            .wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
        let words: Vec<_> = file.lines().map(str::to_string).collect();
        if words.is_empty() {
            eyre::bail!("Word list must not be empty");
        }
        Arc::new(words)
    };
    let hashes = {
        let path = matches
            .get_one::<PathBuf>("hashes")
            .expect("missing required argument");
        let content = fs::read_to_string(path)
            .wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
        let hashes: Result<HashSet<_>, _> = content
            .lines()
            .map(|s| u64::from_str_radix(s, 16).map(Murmur64::from))
            .collect();
        let hashes = hashes?;
        tracing::trace!("{:?}", hashes);
        Arc::new(hashes)
    };
    let mut delimiters: Vec<String> = matches
        .get_many::<String>("delimiter")
        .unwrap_or_default()
        .cloned()
        .collect();
    if delimiters.is_empty() {
        delimiters.push(String::from("/"));
        delimiters.push(String::from("_"));
    }
    let delimiters_len = delimiters.len();
    let word_count = words.len();
    tracing::info!("{} words to try", word_count);
    // To be able to easily combine the permutations of words and delimiters,
    // we turn the latter into a pre-defined list of all permutations of delimiters
    // that are possible at the given amount of words.
    // Combining `Iterator::cycle` with `Itertools::permutations` works, but
    // with a high `max_length`, it runs OOM.
    // So we basically have to implement a smaller version of the iterative algorithm we use later on
    // to build permutations of the actual words.
    let delimiter_lists = {
        let lists = build_delimiter_lists(&delimiters, max_length - 1);
        Arc::new(lists)
    };
    tracing::debug!("{:?}", delimiter_lists);
    let (info_tx, info_rx) = bounded(100);
    let (stdout_tx, stdout_rx) = unbounded::<Vec<u8>>();
    let (task_tx, task_rx) = bounded::<Vec<usize>>(num_threads * 4);
    let mut handles = Vec::new();
    for _ in 0..num_threads {
        let handle = make_worker(
            task_rx.clone(),
            State {
                delimiter_lists: Arc::clone(&delimiter_lists),
                hashes: Arc::clone(&hashes),
                words: Arc::clone(&words),
                delimiters_len,
                stdout_tx: stdout_tx.clone(),
                info_tx: info_tx.clone(),
            },
        );
        handles.push(handle);
    }
    // These are only used inside the worker threads, but due to the loops above, we had to
    // clone them one too many times.
    // So we drop that extra reference immediately, to ensure that the channels can
    // disconnect properly when the threads finish.
    drop(stdout_tx);
    drop(info_tx);
    handles.push(make_info_printer(info_rx, hashes.len()));
    handles.push(make_stdout_printer(stdout_rx));
    let mut indices =
        build_initial_indices(matches.get_one::<String>("continue"), &delimiters, &*words)
            .wrap_err("Failed to build initial indices")?;
    let mut indices_len = indices.len();
    let mut sequence = indices
        .iter()
        .map(|index| words[*index].as_str())
        .collect::<Vec<_>>();
    // Prevent re-allocation by reserving as much as we need upfront
    indices.reserve(max_length);
    sequence.reserve(max_length);
    'outer: loop {
        task_tx.send(indices.clone())?;
        for i in 0..indices_len {
            let index = indices.get_mut(i).unwrap();
            let word = sequence.get_mut(i).unwrap();
            if *index >= word_count - 1 {
                *index = 0;
                *word = words[*index].as_str();
                if indices.get(i + 1).is_none() {
                    indices_len += 1;
                    if indices_len > max_length {
                        break 'outer;
                    }
                    indices.push(0);
                    sequence.push(words[0].as_str());
                    break;
                }
            } else {
                *index += 1;
                *word = words[*index].as_str();
                break;
            }
        }
    }
    // Dropping the senders will disconnect the channel,
    // so that the threads holding the other end will eventually
    // complete as well.
    drop(task_tx);
    for handle in handles {
        match handle.join() {
            Ok(_) => {}
            Err(value) => {
                if let Some(err) = value.downcast_ref::<String>() {
                    eyre::bail!("Thread failed: {}", err);
                } else {
                    eyre::bail!("Thread failed with unknown error: {:?}", value);
                }
            }
        }
    }
    let _ = std::io::stdout().write_all("\r".as_bytes());
    Ok(())
 }
--- a/crates/dtmt/src/cmd/experiment/extract_words.rs
+++ b/crates/dtmt/src/cmd/experiment/extract_words.rs
@ -1,463 +0,0 @@
 use std::collections::HashMap;
 use std::path::PathBuf;
 use clap::{value_parser, Arg, ArgMatches, Command, ValueEnum};
 use color_eyre::eyre::Context;
 use color_eyre::Result;
 use tokio::fs;
 pub(crate) fn command_definition() -> Command {
    Command::new("extract-words")
        .about(
            "Extract unique alphanumeric sequences  that match common identifier rules from the given file. \
                Only ASCII is supported.",
        )
        .arg(
            Arg::new("file")
                .required(true)
                .value_parser(value_parser!(PathBuf))
                .help("Path to the file to extract words from."),
        )
        .arg(
            Arg::new("min-length")
                .help("Minimum length to consider a word.")
                .long("min-length")
                .short('m')
                .default_value("3")
                .value_parser(value_parser!(usize))
        )
        .arg(
            Arg::new("algorithm")
                .help("The algorithm to determine matching words")
                .long("algorithm")
                .short('a')
                .default_value("identifier")
                .value_parser(value_parser!(Algorithm))
        )
 }
 #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
 #[value(rename_all = "snake_case")]
 enum Algorithm {
    Alphabetic,
    Alphanumeric,
    Identifier,
    Number,
    Hash32,
    Hash64,
    Paths,
 }
 impl Algorithm {
    fn is_start(&self, c: char) -> bool {
        match self {
            Self::Alphabetic => c.is_ascii_alphabetic(),
            Self::Alphanumeric => c.is_ascii_alphanumeric(),
            Self::Identifier => c.is_ascii_alphabetic(),
            Self::Number => c.is_numeric(),
            Self::Hash32 | Self::Hash64 => matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F'),
            // Supposed to be handled separately
            Self::Paths => false,
        }
    }
    fn is_body(&self, c: char) -> bool {
        match self {
            Self::Alphabetic => c.is_ascii_alphabetic(),
            Self::Alphanumeric => c.is_ascii_alphanumeric(),
            Self::Identifier => c.is_ascii_alphanumeric(),
            Self::Number => c.is_numeric(),
            Self::Hash32 | Self::Hash64 => matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F'),
            // Supposed to be handled separately
            Self::Paths => false,
        }
    }
    fn is_length(&self, len: usize) -> bool {
        match self {
            Self::Alphabetic => true,
            Self::Alphanumeric => true,
            Self::Identifier => true,
            Self::Number => true,
            Self::Hash32 => len == 8,
            Self::Hash64 => len == 16,
            // Supposed to be handled separately
            Self::Paths => false,
        }
    }
 }
 impl std::fmt::Display for Algorithm {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{}",
            match self {
                Algorithm::Alphabetic => "alphabetic",
                Algorithm::Alphanumeric => "alphanumeric",
                Algorithm::Identifier => "identifier",
                Algorithm::Number => "number",
                Algorithm::Hash32 => "hash32",
                Algorithm::Hash64 => "hash64",
                Algorithm::Paths => "paths",
            }
        )
    }
 }
 #[derive(Copy, Clone, Debug)]
 enum PathState {
    Begin,
    PathComponent,
    PathSeparator,
    Boundary,
    NonWord,
    End,
 }
 #[tracing::instrument(skip(chars))]
 fn extract_paths(chars: impl Iterator<Item = char>) -> Vec<Vec<String>> {
    let mut chars = chars.peekable();
    let mut state = PathState::Begin;
    let mut list = Vec::new();
    let mut path = Vec::new();
    let mut word = String::new();
    let is_boundary = |c: char| c == '\n' || c == ' ' || c == ',' || c == '\t' || c == '|';
    'machine: loop {
        state = match state {
            PathState::Begin => match chars.next() {
                None => PathState::End,
                Some(c) if c.is_ascii_alphabetic() => {
                    word.push(c);
                    PathState::PathComponent
                }
                Some(c) if is_boundary(c) => PathState::Boundary,
                Some('/') => PathState::PathSeparator,
                Some(_) => PathState::NonWord,
            },
            PathState::PathComponent => match chars.next() {
                None => {
                    path.push(word.clone());
                    list.push(path.clone());
                    PathState::End
                }
                Some(c) if c.is_ascii_alphanumeric() || c == '_' => {
                    word.push(c);
                    PathState::PathComponent
                }
                Some('/') => {
                    path.push(word.clone());
                    word.clear();
                    PathState::PathSeparator
                }
                Some(c) if is_boundary(c) => {
                    path.push(word.clone());
                    list.push(path.clone());
                    path.clear();
                    word.clear();
                    PathState::Boundary
                }
                Some(_) => {
                    list.push(path.clone());
                    path.clear();
                    word.clear();
                    PathState::NonWord
                }
            },
            PathState::PathSeparator => match chars.next() {
                None => {
                    list.push(path.clone());
                    PathState::End
                }
                Some('/') => PathState::PathSeparator,
                Some(c) if c.is_ascii_alphabetic() || c == '_' => {
                    word.push(c);
                    PathState::PathComponent
                }
                Some(c) if is_boundary(c) => {
                    list.push(path.clone());
                    path.clear();
                    PathState::Boundary
                }
                Some(_) => {
                    list.push(path.clone());
                    path.clear();
                    PathState::NonWord
                }
            },
            PathState::Boundary => match chars.next() {
                None => PathState::End,
                Some(c) if c.is_ascii_alphabetic() => {
                    word.push(c);
                    PathState::PathComponent
                }
                Some(c) if is_boundary(c) => PathState::Boundary,
                Some(_) => PathState::NonWord,
            },
            PathState::NonWord => match chars.next() {
                None => PathState::End,
                Some(c) if is_boundary(c) => PathState::Boundary,
                Some(_) => PathState::NonWord,
            },
            PathState::End => {
                break 'machine;
            }
        }
    }
    list
 }
 #[tracing::instrument(skip(chars))]
 fn algorithm_path_components(chars: impl Iterator<Item = char>, min_length: usize) {
    let mut chars = chars.peekable();
    let mut state = PathState::Begin;
    let mut word = String::new();
    let mut lists = vec![HashMap::<String, usize>::new()];
    let mut index = 0;
    let is_boundary = |c: char| c == '\n' || c == ' ' || c == ',' || c == '\t';
    'machine: loop {
        state = match state {
            PathState::Begin => match chars.next() {
                None => PathState::End,
                Some(c) if c.is_ascii_alphabetic() => {
                    word.push(c);
                    PathState::PathComponent
                }
                Some(c) if is_boundary(c) => PathState::Boundary,
                // Ignore leading path separators to not trigger the logic of advancing
                // the component count
                Some('/') => PathState::Boundary,
                Some(_) => PathState::NonWord,
            },
            PathState::PathComponent => match chars.next() {
                None => PathState::End,
                Some(c) if c.is_ascii_alphanumeric() || c == '_' => {
                    word.push(c);
                    PathState::PathComponent
                }
                Some('/') => PathState::PathSeparator,
                Some(c) => {
                    if index > 0 && word.len() >= min_length {
                        let list = &mut lists[index];
                        list.entry(word.clone())
                            .and_modify(|count| *count += 1)
                            .or_insert(1);
                    }
                    word.clear();
                    index = 0;
                    if is_boundary(c) {
                        PathState::Boundary
                    } else {
                        PathState::NonWord
                    }
                }
            },
            PathState::PathSeparator => {
                if word.len() >= min_length {
                    let list = &mut lists[index];
                    list.entry(word.clone())
                        .and_modify(|count| *count += 1)
                        .or_insert(1);
                }
                word.clear();
                index += 1;
                if lists.get(index).is_none() {
                    lists.push(HashMap::new());
                }
                // Ignore multiple separators
                while chars.next_if(|c| *c == '/').is_some() {}
                match chars.next() {
                    None => PathState::End,
                    Some(c) if c.is_ascii_alphabetic() || c == '_' => {
                        word.push(c);
                        PathState::PathComponent
                    }
                    Some(c) if is_boundary(c) => {
                        index = 0;
                        PathState::Boundary
                    }
                    Some(_) => {
                        index = 0;
                        PathState::NonWord
                    }
                }
            }
            PathState::Boundary => match chars.next() {
                None => PathState::End,
                Some(c) if c.is_ascii_alphabetic() => {
                    word.push(c);
                    PathState::PathComponent
                }
                Some(c) if is_boundary(c) => PathState::Boundary,
                Some(_) => PathState::NonWord,
            },
            PathState::NonWord => match chars.next() {
                None => PathState::End,
                Some(c) if is_boundary(c) => PathState::Boundary,
                Some(_) => PathState::NonWord,
            },
            PathState::End => {
                if word.len() >= min_length {
                    let list = &mut lists[index];
                    list.entry(word.clone())
                        .and_modify(|count| *count += 1)
                        .or_insert(1);
                }
                break 'machine;
            }
        }
    }
    for i in 0..lists.len() {
        print!("Word {i}, Count {i},");
    }
    println!();
    let mut lines: Vec<Vec<Option<(String, usize)>>> = Vec::new();
    for (i, list) in lists.into_iter().enumerate() {
        let mut entries = list.into_iter().collect::<Vec<_>>();
        entries.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
        for (j, (word, count)) in entries.into_iter().enumerate() {
            if let Some(line) = lines.get_mut(j) {
                while line.len() < i {
                    line.push(None);
                }
                line.push(Some((word, count)));
            } else {
                let mut line = Vec::new();
                while line.len() < i {
                    line.push(None);
                }
                line.push(Some((word, count)));
                lines.push(line);
            }
        }
    }
    for line in lines.iter() {
        for cell in line.iter() {
            if let Some((word, count)) = cell {
                print!("{},{},", word, count);
            } else {
                print!(",,");
            }
        }
        println!();
    }
 }
 #[derive(Copy, Clone, Debug)]
 enum State {
    Begin,
    NonWord,
    Word,
    End,
 }
 #[tracing::instrument(skip_all)]
 pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
    let path = matches
        .get_one::<PathBuf>("file")
        .expect("missing required parameter");
    let algorithm = matches
        .get_one::<Algorithm>("algorithm")
        .expect("parameter has default");
    let min_length = matches
        .get_one::<usize>("min-length")
        .copied()
        .expect("paramter has default");
    let content = fs::read_to_string(&path)
        .await
        .wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
    let mut chars = content.chars();
    if *algorithm == Algorithm::Paths {
        algorithm_path_components(chars, min_length);
        return Ok(());
    }
    let mut state = State::Begin;
    let mut word = String::new();
    let mut visited = HashMap::new();
    'machine: loop {
        state = match state {
            State::Begin => match chars.next() {
                None => State::End,
                Some(c) if algorithm.is_start(c) => {
                    word.push(c);
                    State::Word
                }
                Some(_) => State::NonWord,
            },
            State::End => break 'machine,
            State::NonWord => match chars.next() {
                None => State::End,
                Some(c) if algorithm.is_body(c) => {
                    word.push(c);
                    State::Word
                }
                Some(_) => State::NonWord,
            },
            State::Word => match chars.next() {
                None => {
                    if word.len() >= min_length && algorithm.is_length(word.len()) {
                        visited
                            .entry(word.clone())
                            .and_modify(|v| *v += 1)
                            .or_insert(1);
                    }
                    State::End
                }
                Some(c) if algorithm.is_body(c) => {
                    word.push(c);
                    State::Word
                }
                Some(_) => {
                    if word.len() >= min_length && algorithm.is_length(word.len()) {
                        visited
                            .entry(word.clone())
                            .and_modify(|v| *v += 1)
                            .or_insert(1);
                    }
                    word.clear();
                    State::NonWord
                }
            },
        }
    }
    let mut entries: Vec<(String, usize)> = visited.into_iter().collect();
    // Reverse sides during comparison to get "highest to lowest"
    entries.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
    entries
        .iter()
        .for_each(|(word, count)| println!("{:016} {}", word, count));
    Ok(())
 }
--- a/crates/dtmt/src/cmd/experiment/mod.rs
+++ b/crates/dtmt/src/cmd/experiment/mod.rs
@ -1,26 +0,0 @@
 use clap::{ArgMatches, Command};
 use color_eyre::Result;
 mod brute_force_words;
 mod extract_words;
 pub(crate) fn command_definition() -> Command {
    Command::new("experiment")
        .subcommand_required(true)
        .about("A collection of utilities and experiments.")
        .subcommand(brute_force_words::command_definition())
        .subcommand(extract_words::command_definition())
 }
 #[tracing::instrument(skip_all)]
 pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
    match matches.subcommand() {
        // It's fine to block here, as this is the only thing that's executing on the runtime.
        // The other option with `spawn_blocking` would require setting up values to be Send+Sync.
        Some(("brute-force-words", sub_matches)) => brute_force_words::run(ctx, sub_matches),
        Some(("extract-words", sub_matches)) => extract_words::run(ctx, sub_matches).await,
        _ => unreachable!(
            "clap is configured to require a subcommand, and they're all handled above"
        ),
    }
 }
--- a/crates/dtmt/src/main.rs
+++ b/crates/dtmt/src/main.rs
@ -21,7 +21,6 @@ mod cmd {
    pub mod build;
    pub mod bundle;
    pub mod dictionary;
    pub mod experiment;
    pub mod migrate;
    pub mod murmur;
    pub mod new;
@ -57,7 +56,6 @@ async fn main() -> Result<()> {
        .subcommand(cmd::build::command_definition())
        .subcommand(cmd::bundle::command_definition())
        .subcommand(cmd::dictionary::command_definition())
        .subcommand(cmd::experiment::command_definition())
        .subcommand(cmd::migrate::command_definition())
        .subcommand(cmd::murmur::command_definition())
        .subcommand(cmd::new::command_definition())
@ -135,7 +133,6 @@ async fn main() -> Result<()> {
        Some(("build", sub_matches)) => cmd::build::run(ctx, sub_matches).await?,
        Some(("bundle", sub_matches)) => cmd::bundle::run(ctx, sub_matches).await?,
        Some(("dictionary", sub_matches)) => cmd::dictionary::run(ctx, sub_matches).await?,
        Some(("experiment", sub_matches)) => cmd::experiment::run(ctx, sub_matches).await?,
        Some(("migrate", sub_matches)) => cmd::migrate::run(ctx, sub_matches).await?,
        Some(("murmur", sub_matches)) => cmd::murmur::run(ctx, sub_matches).await?,
        Some(("new", sub_matches)) => cmd::new::run(ctx, sub_matches).await?,