Compare commits
No commits in common. "ae1e7e5aa6e8c0ca54da5a485cad9a2e0e64b869" and "e56176e341bf104afe9923e1792394e5eec9b425" have entirely different histories.
ae1e7e5aa6
...
e56176e341
6 changed files with 2 additions and 1097 deletions
84
Cargo.lock
generated
84
Cargo.lock
generated
|
@ -161,17 +161,6 @@ dependencies = [
|
||||||
"system-deps",
|
"system-deps",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "atty"
|
|
||||||
version = "0.2.14"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
|
||||||
dependencies = [
|
|
||||||
"hermit-abi 0.1.19",
|
|
||||||
"libc",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.3.0"
|
version = "1.3.0"
|
||||||
|
@ -223,7 +212,7 @@ dependencies = [
|
||||||
"bitflags 2.5.0",
|
"bitflags 2.5.0",
|
||||||
"cexpr",
|
"cexpr",
|
||||||
"clang-sys",
|
"clang-sys",
|
||||||
"itertools 0.12.1",
|
"itertools",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"lazycell",
|
"lazycell",
|
||||||
"log",
|
"log",
|
||||||
|
@ -658,20 +647,6 @@ dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crossbeam"
|
|
||||||
version = "0.8.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"crossbeam-channel",
|
|
||||||
"crossbeam-deque",
|
|
||||||
"crossbeam-epoch",
|
|
||||||
"crossbeam-queue",
|
|
||||||
"crossbeam-utils",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-channel"
|
name = "crossbeam-channel"
|
||||||
version = "0.5.12"
|
version = "0.5.12"
|
||||||
|
@ -681,40 +656,6 @@ dependencies = [
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crossbeam-deque"
|
|
||||||
version = "0.8.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"crossbeam-epoch",
|
|
||||||
"crossbeam-utils",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crossbeam-epoch"
|
|
||||||
version = "0.9.15"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
"cfg-if",
|
|
||||||
"crossbeam-utils",
|
|
||||||
"memoffset 0.9.1",
|
|
||||||
"scopeguard",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crossbeam-queue"
|
|
||||||
version = "0.3.8"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"crossbeam-utils",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-utils"
|
name = "crossbeam-utils"
|
||||||
version = "0.8.20"
|
version = "0.8.20"
|
||||||
|
@ -986,18 +927,15 @@ name = "dtmt"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-recursion",
|
"async-recursion",
|
||||||
"atty",
|
|
||||||
"clap",
|
"clap",
|
||||||
"cli-table",
|
"cli-table",
|
||||||
"color-eyre",
|
"color-eyre",
|
||||||
"confy",
|
"confy",
|
||||||
"crossbeam",
|
|
||||||
"csv-async",
|
"csv-async",
|
||||||
"dtmt-shared",
|
"dtmt-shared",
|
||||||
"futures",
|
"futures",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"glob",
|
"glob",
|
||||||
"itertools 0.11.0",
|
|
||||||
"luajit2-sys",
|
"luajit2-sys",
|
||||||
"nanorand",
|
"nanorand",
|
||||||
"notify",
|
"notify",
|
||||||
|
@ -1660,15 +1598,6 @@ version = "0.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hermit-abi"
|
|
||||||
version = "0.1.19"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
|
@ -1929,15 +1858,6 @@ version = "1.70.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
|
checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "itertools"
|
|
||||||
version = "0.11.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
|
||||||
dependencies = [
|
|
||||||
"either",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itertools"
|
name = "itertools"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
|
@ -2347,7 +2267,7 @@ version = "1.16.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hermit-abi 0.3.9",
|
"hermit-abi",
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -33,9 +33,6 @@ async-recursion = "1.0.2"
|
||||||
notify = "6.1.1"
|
notify = "6.1.1"
|
||||||
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
|
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
|
||||||
shlex = { version = "1.2.0", optional = true }
|
shlex = { version = "1.2.0", optional = true }
|
||||||
atty = "0.2.14"
|
|
||||||
itertools = "0.11.0"
|
|
||||||
crossbeam = { version = "0.8.2", features = ["crossbeam-deque"] }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.3.0"
|
tempfile = "3.3.0"
|
||||||
|
|
|
@ -1,520 +0,0 @@
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::fs;
|
|
||||||
use std::io::{BufWriter, Write};
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::thread::JoinHandle;
|
|
||||||
|
|
||||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
|
||||||
use color_eyre::eyre::{self, Context};
|
|
||||||
use color_eyre::Result;
|
|
||||||
use crossbeam::channel::{bounded, unbounded, Receiver, Sender};
|
|
||||||
use itertools::Itertools;
|
|
||||||
use sdk::murmur::Murmur64;
|
|
||||||
use tokio::time::Instant;
|
|
||||||
|
|
||||||
pub(crate) fn command_definition() -> Command {
|
|
||||||
Command::new("brute-force-words")
|
|
||||||
.about(
|
|
||||||
"Given a list of words and a set of delimiters, iteratevily creates permutations \
|
|
||||||
of growing length.\n\
|
|
||||||
Delimiters are placed between every word in the result.\n\n\
|
|
||||||
Example: \
|
|
||||||
Given the words ['packages', 'boot'], the delimiters ['/', '_'] and a length of 2, the resulting \
|
|
||||||
words will be\n\
|
|
||||||
- packages\n\
|
|
||||||
- boot\n\
|
|
||||||
- packages/packages\n\
|
|
||||||
- packages_packages\n\
|
|
||||||
- packages/boot\n\
|
|
||||||
- packages_boot\n\
|
|
||||||
- boot/packages\n\
|
|
||||||
- boot_packages\n\
|
|
||||||
- boot/boot\n\
|
|
||||||
- boot_boot",
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("delimiter")
|
|
||||||
.help(
|
|
||||||
"The delimiters to put between the words. \
|
|
||||||
All permutations of this list will be tried for every string of words.\n\
|
|
||||||
Specify multiple times to set multiple values.\n\
|
|
||||||
Defaults to ['/', '_'].",
|
|
||||||
)
|
|
||||||
.short('d')
|
|
||||||
.long("delimiter")
|
|
||||||
.action(ArgAction::Append),
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("max-length")
|
|
||||||
.help("The maximum number of words up to which to build strings.")
|
|
||||||
.long("max")
|
|
||||||
.long("max-length")
|
|
||||||
.short('m')
|
|
||||||
.default_value("5")
|
|
||||||
.value_parser(value_parser!(usize)),
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("continue")
|
|
||||||
.help("Can be used to continue a previous operation where it stopped. Word list and delimiters must match.")
|
|
||||||
.short('c')
|
|
||||||
.long("continue")
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("threads")
|
|
||||||
.help("The number of workers to run in parallel.")
|
|
||||||
.long("threads")
|
|
||||||
.short('n')
|
|
||||||
.default_value("6")
|
|
||||||
.value_parser(value_parser!(usize))
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("words")
|
|
||||||
.help("Path to a file containing words line by line.")
|
|
||||||
.required(true)
|
|
||||||
.value_parser(value_parser!(PathBuf)),
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("hashes")
|
|
||||||
.help(
|
|
||||||
"Path to a file containing the hashes to attempt to brute force. \
|
|
||||||
Hashes are expected in hexadecimal notiation. \
|
|
||||||
Only 64-bit hashes are supported."
|
|
||||||
)
|
|
||||||
.required(true)
|
|
||||||
.value_parser(value_parser!(PathBuf)),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const LINE_FEED: u8 = 0x0A;
|
|
||||||
const UNDERSCORE: u8 = 0x5F;
|
|
||||||
const ZERO: u8 = 0x30;
|
|
||||||
|
|
||||||
const PREFIXES: [&str; 36] = [
|
|
||||||
"",
|
|
||||||
"content/characters/",
|
|
||||||
"content/debug/",
|
|
||||||
"content/decals/",
|
|
||||||
"content/environment/",
|
|
||||||
"content/fx/",
|
|
||||||
"content/fx/particles/",
|
|
||||||
"content/gizmos/",
|
|
||||||
"content/items/",
|
|
||||||
"content/levels/",
|
|
||||||
"content/liquid_area/",
|
|
||||||
"content/localization/",
|
|
||||||
"content/materials/",
|
|
||||||
"content/minion_impact_assets/",
|
|
||||||
"content/pickups/",
|
|
||||||
"content/shading_environments/",
|
|
||||||
"content/textures/",
|
|
||||||
"content/ui/",
|
|
||||||
"content/videos/",
|
|
||||||
"content/vo/",
|
|
||||||
"content/volume_types/",
|
|
||||||
"content/weapons/",
|
|
||||||
"content/",
|
|
||||||
"core/",
|
|
||||||
"core/units/",
|
|
||||||
"packages/boot_assets/",
|
|
||||||
"packages/content/",
|
|
||||||
"packages/game_scripts/",
|
|
||||||
"packages/strings/",
|
|
||||||
"packages/ui/",
|
|
||||||
"packages/",
|
|
||||||
"wwise/events/",
|
|
||||||
"wwise/packages/",
|
|
||||||
"wwise/world_sound_fx/",
|
|
||||||
"wwise/events/weapons/",
|
|
||||||
"wwise/events/minions/",
|
|
||||||
];
|
|
||||||
|
|
||||||
fn make_info_printer(rx: Receiver<(usize, usize, String)>, hash_count: usize) -> JoinHandle<()> {
|
|
||||||
std::thread::spawn(move || {
|
|
||||||
let mut writer = std::io::stderr();
|
|
||||||
let mut total_count = 0;
|
|
||||||
let mut total_found = 0;
|
|
||||||
|
|
||||||
let mut start = Instant::now();
|
|
||||||
|
|
||||||
while let Ok((count, found, last)) = rx.recv() {
|
|
||||||
total_count += count;
|
|
||||||
total_found += found;
|
|
||||||
|
|
||||||
let now = Instant::now();
|
|
||||||
if (now - start).as_millis() > 250 {
|
|
||||||
let s = &last[0..std::cmp::min(last.len(), 60)];
|
|
||||||
let s = format!(
|
|
||||||
"\r{:12} per second | {total_found:6}/{hash_count} found | {s:<60}",
|
|
||||||
total_count * 4
|
|
||||||
);
|
|
||||||
|
|
||||||
writer.write_all(s.as_bytes()).unwrap();
|
|
||||||
|
|
||||||
total_count = 0;
|
|
||||||
start = now;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn make_stdout_printer(rx: Receiver<Vec<u8>>) -> JoinHandle<()> {
|
|
||||||
std::thread::spawn(move || {
|
|
||||||
let mut writer = std::io::stdout();
|
|
||||||
|
|
||||||
while let Ok(buf) = rx.recv() {
|
|
||||||
writer.write_all(&buf).unwrap();
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
struct State {
|
|
||||||
delimiter_lists: Arc<Vec<Vec<String>>>,
|
|
||||||
hashes: Arc<HashSet<Murmur64>>,
|
|
||||||
words: Arc<Vec<String>>,
|
|
||||||
delimiters_len: usize,
|
|
||||||
stdout_tx: Sender<Vec<u8>>,
|
|
||||||
info_tx: Sender<(usize, usize, String)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn make_worker(rx: Receiver<Vec<usize>>, state: State) -> JoinHandle<()> {
|
|
||||||
std::thread::spawn(move || {
|
|
||||||
let delimiter_lists = &state.delimiter_lists;
|
|
||||||
let hashes = &state.hashes;
|
|
||||||
let words = &state.words;
|
|
||||||
let delimiters_len = state.delimiters_len;
|
|
||||||
|
|
||||||
let mut count = 0;
|
|
||||||
let mut found = 0;
|
|
||||||
let mut buf = Vec::with_capacity(1024);
|
|
||||||
|
|
||||||
while let Ok(indices) = rx.recv() {
|
|
||||||
let sequence = indices.iter().map(|i| words[*i].as_str());
|
|
||||||
|
|
||||||
// We only want delimiters between words, so we keep that iterator shorter by
|
|
||||||
// one.
|
|
||||||
let delimiter_count = sequence.len() as u32 - 1;
|
|
||||||
|
|
||||||
for prefix in PREFIXES.iter().map(|p| p.as_bytes()) {
|
|
||||||
buf.clear();
|
|
||||||
|
|
||||||
// We can keep the prefix at the front of the buffer and only
|
|
||||||
// replace the parts after that.
|
|
||||||
let prefix_len = prefix.len();
|
|
||||||
buf.extend_from_slice(prefix);
|
|
||||||
|
|
||||||
for delims in delimiter_lists
|
|
||||||
.iter()
|
|
||||||
.take(delimiters_len.pow(delimiter_count))
|
|
||||||
{
|
|
||||||
buf.truncate(prefix_len);
|
|
||||||
|
|
||||||
let delims = delims
|
|
||||||
.iter()
|
|
||||||
.map(|s| s.as_str())
|
|
||||||
.take(delimiter_count as usize);
|
|
||||||
sequence
|
|
||||||
.clone()
|
|
||||||
.interleave(delims.clone())
|
|
||||||
.for_each(|word| buf.extend_from_slice(word.as_bytes()));
|
|
||||||
|
|
||||||
count += 1;
|
|
||||||
|
|
||||||
let hash = Murmur64::hash(&buf);
|
|
||||||
if hashes.contains(&hash) {
|
|
||||||
found += 1;
|
|
||||||
|
|
||||||
buf.push(LINE_FEED);
|
|
||||||
if state.stdout_tx.send(buf.clone()).is_err() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let word_len = buf.len();
|
|
||||||
|
|
||||||
// If the regular word itself didn't match, we check
|
|
||||||
// for numbered suffixes.
|
|
||||||
// For now, we only check up to `09` to avoid more complex logic
|
|
||||||
// writing into the buffer.
|
|
||||||
// Packages that contain files with higher numbers than this
|
|
||||||
// should hopefully become easier to spot once a good number of
|
|
||||||
// hashes is found.
|
|
||||||
for i in 1..=9 {
|
|
||||||
buf.truncate(word_len);
|
|
||||||
buf.push(UNDERSCORE);
|
|
||||||
buf.push(ZERO);
|
|
||||||
buf.push(ZERO + i);
|
|
||||||
|
|
||||||
count += 1;
|
|
||||||
|
|
||||||
let hash = Murmur64::hash(&buf);
|
|
||||||
if hashes.contains(&hash) {
|
|
||||||
found += 1;
|
|
||||||
|
|
||||||
buf.push(LINE_FEED);
|
|
||||||
if state.stdout_tx.send(buf.clone()).is_err() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if count >= 2 * 1024 * 1024 {
|
|
||||||
// The last prefix in the set is the one that will stay in the buffer
|
|
||||||
// when we're about to print here.
|
|
||||||
// So we strip that, to show just the generated part.
|
|
||||||
// We also restrict the length to stay on a single line.
|
|
||||||
let prefix_len = PREFIXES[35].len();
|
|
||||||
// No need to wait for this
|
|
||||||
let _ = state.info_tx.try_send((
|
|
||||||
count,
|
|
||||||
found,
|
|
||||||
String::from_utf8_lossy(&buf[prefix_len..]).to_string(),
|
|
||||||
));
|
|
||||||
|
|
||||||
count = 0;
|
|
||||||
found = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_delimiter_lists(delimiters: impl AsRef<[String]>, max_length: usize) -> Vec<Vec<String>> {
|
|
||||||
let delimiters = delimiters.as_ref();
|
|
||||||
let mut indices = vec![0; max_length];
|
|
||||||
let mut list = Vec::new();
|
|
||||||
|
|
||||||
for _ in 0..delimiters.len().pow(max_length as u32) {
|
|
||||||
list.push(
|
|
||||||
indices
|
|
||||||
.iter()
|
|
||||||
.map(|i| delimiters[*i].clone())
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
);
|
|
||||||
|
|
||||||
for v in indices.iter_mut() {
|
|
||||||
if *v >= delimiters.len() - 1 {
|
|
||||||
*v = 0;
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
*v += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
list
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_initial_indices(
|
|
||||||
cont: Option<&String>,
|
|
||||||
delimiters: impl AsRef<[String]>,
|
|
||||||
words: impl AsRef<[String]>,
|
|
||||||
) -> Result<Vec<usize>> {
|
|
||||||
if let Some(cont) = cont {
|
|
||||||
let mut splits = vec![cont.clone()];
|
|
||||||
|
|
||||||
for delim in delimiters.as_ref().iter() {
|
|
||||||
splits = splits
|
|
||||||
.iter()
|
|
||||||
.flat_map(|s| s.split(delim))
|
|
||||||
.map(|s| s.to_string())
|
|
||||||
.collect();
|
|
||||||
}
|
|
||||||
|
|
||||||
let indices = splits
|
|
||||||
.into_iter()
|
|
||||||
.map(|s| {
|
|
||||||
words
|
|
||||||
.as_ref()
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.find(|(_, v)| s == **v)
|
|
||||||
.map(|(i, _)| i)
|
|
||||||
.ok_or_else(|| eyre::eyre!("'{}' is not in the word list", s))
|
|
||||||
})
|
|
||||||
.collect::<Result<_>>()?;
|
|
||||||
|
|
||||||
tracing::info!("Continuing from '{}' -> '{:?}'", cont, &indices);
|
|
||||||
|
|
||||||
Ok(indices)
|
|
||||||
} else {
|
|
||||||
Ok(vec![0])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
|
||||||
#[allow(clippy::mut_range_bound)]
|
|
||||||
pub(crate) fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
|
|
||||||
let max_length: usize = matches
|
|
||||||
.get_one::<usize>("max-length")
|
|
||||||
.copied()
|
|
||||||
.expect("parameter has default");
|
|
||||||
|
|
||||||
let num_threads: usize = matches
|
|
||||||
.get_one::<usize>("threads")
|
|
||||||
.copied()
|
|
||||||
.expect("parameter has default");
|
|
||||||
|
|
||||||
let words = {
|
|
||||||
let path = matches
|
|
||||||
.get_one::<PathBuf>("words")
|
|
||||||
.expect("missing required parameter");
|
|
||||||
|
|
||||||
let file = fs::read_to_string(path)
|
|
||||||
.wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
|
|
||||||
|
|
||||||
let words: Vec<_> = file.lines().map(str::to_string).collect();
|
|
||||||
|
|
||||||
if words.is_empty() {
|
|
||||||
eyre::bail!("Word list must not be empty");
|
|
||||||
}
|
|
||||||
|
|
||||||
Arc::new(words)
|
|
||||||
};
|
|
||||||
|
|
||||||
let hashes = {
|
|
||||||
let path = matches
|
|
||||||
.get_one::<PathBuf>("hashes")
|
|
||||||
.expect("missing required argument");
|
|
||||||
let content = fs::read_to_string(path)
|
|
||||||
.wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
|
|
||||||
|
|
||||||
let hashes: Result<HashSet<_>, _> = content
|
|
||||||
.lines()
|
|
||||||
.map(|s| u64::from_str_radix(s, 16).map(Murmur64::from))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let hashes = hashes?;
|
|
||||||
|
|
||||||
tracing::trace!("{:?}", hashes);
|
|
||||||
|
|
||||||
Arc::new(hashes)
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut delimiters: Vec<String> = matches
|
|
||||||
.get_many::<String>("delimiter")
|
|
||||||
.unwrap_or_default()
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if delimiters.is_empty() {
|
|
||||||
delimiters.push(String::from("/"));
|
|
||||||
delimiters.push(String::from("_"));
|
|
||||||
}
|
|
||||||
|
|
||||||
let delimiters_len = delimiters.len();
|
|
||||||
|
|
||||||
let word_count = words.len();
|
|
||||||
tracing::info!("{} words to try", word_count);
|
|
||||||
|
|
||||||
// To be able to easily combine the permutations of words and delimiters,
|
|
||||||
// we turn the latter into a pre-defined list of all permutations of delimiters
|
|
||||||
// that are possible at the given amount of words.
|
|
||||||
// Combining `Iterator::cycle` with `Itertools::permutations` works, but
|
|
||||||
// with a high `max_length`, it runs OOM.
|
|
||||||
// So we basically have to implement a smaller version of the iterative algorithm we use later on
|
|
||||||
// to build permutations of the actual words.
|
|
||||||
let delimiter_lists = {
|
|
||||||
let lists = build_delimiter_lists(&delimiters, max_length - 1);
|
|
||||||
Arc::new(lists)
|
|
||||||
};
|
|
||||||
tracing::debug!("{:?}", delimiter_lists);
|
|
||||||
|
|
||||||
let (info_tx, info_rx) = bounded(100);
|
|
||||||
let (stdout_tx, stdout_rx) = unbounded::<Vec<u8>>();
|
|
||||||
let (task_tx, task_rx) = bounded::<Vec<usize>>(num_threads * 4);
|
|
||||||
let mut handles = Vec::new();
|
|
||||||
|
|
||||||
for _ in 0..num_threads {
|
|
||||||
let handle = make_worker(
|
|
||||||
task_rx.clone(),
|
|
||||||
State {
|
|
||||||
delimiter_lists: Arc::clone(&delimiter_lists),
|
|
||||||
hashes: Arc::clone(&hashes),
|
|
||||||
words: Arc::clone(&words),
|
|
||||||
delimiters_len,
|
|
||||||
stdout_tx: stdout_tx.clone(),
|
|
||||||
info_tx: info_tx.clone(),
|
|
||||||
},
|
|
||||||
);
|
|
||||||
handles.push(handle);
|
|
||||||
}
|
|
||||||
// These are only used inside the worker threads, but due to the loops above, we had to
|
|
||||||
// clone them one too many times.
|
|
||||||
// So we drop that extra reference immediately, to ensure that the channels can
|
|
||||||
// disconnect properly when the threads finish.
|
|
||||||
drop(stdout_tx);
|
|
||||||
drop(info_tx);
|
|
||||||
|
|
||||||
handles.push(make_info_printer(info_rx, hashes.len()));
|
|
||||||
handles.push(make_stdout_printer(stdout_rx));
|
|
||||||
|
|
||||||
let mut indices =
|
|
||||||
build_initial_indices(matches.get_one::<String>("continue"), &delimiters, &*words)
|
|
||||||
.wrap_err("Failed to build initial indices")?;
|
|
||||||
let mut indices_len = indices.len();
|
|
||||||
let mut sequence = indices
|
|
||||||
.iter()
|
|
||||||
.map(|index| words[*index].as_str())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
// Prevent re-allocation by reserving as much as we need upfront
|
|
||||||
indices.reserve(max_length);
|
|
||||||
sequence.reserve(max_length);
|
|
||||||
|
|
||||||
'outer: loop {
|
|
||||||
task_tx.send(indices.clone())?;
|
|
||||||
|
|
||||||
for i in 0..indices_len {
|
|
||||||
let index = indices.get_mut(i).unwrap();
|
|
||||||
let word = sequence.get_mut(i).unwrap();
|
|
||||||
|
|
||||||
if *index >= word_count - 1 {
|
|
||||||
*index = 0;
|
|
||||||
*word = words[*index].as_str();
|
|
||||||
|
|
||||||
if indices.get(i + 1).is_none() {
|
|
||||||
indices_len += 1;
|
|
||||||
|
|
||||||
if indices_len > max_length {
|
|
||||||
break 'outer;
|
|
||||||
}
|
|
||||||
|
|
||||||
indices.push(0);
|
|
||||||
sequence.push(words[0].as_str());
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
*index += 1;
|
|
||||||
*word = words[*index].as_str();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dropping the senders will disconnect the channel,
|
|
||||||
// so that the threads holding the other end will eventually
|
|
||||||
// complete as well.
|
|
||||||
drop(task_tx);
|
|
||||||
|
|
||||||
for handle in handles {
|
|
||||||
match handle.join() {
|
|
||||||
Ok(_) => {}
|
|
||||||
Err(value) => {
|
|
||||||
if let Some(err) = value.downcast_ref::<String>() {
|
|
||||||
eyre::bail!("Thread failed: {}", err);
|
|
||||||
} else {
|
|
||||||
eyre::bail!("Thread failed with unknown error: {:?}", value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let _ = std::io::stdout().write_all("\r".as_bytes());
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
|
@ -1,463 +0,0 @@
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
use clap::{value_parser, Arg, ArgMatches, Command, ValueEnum};
|
|
||||||
use color_eyre::eyre::Context;
|
|
||||||
use color_eyre::Result;
|
|
||||||
use tokio::fs;
|
|
||||||
|
|
||||||
pub(crate) fn command_definition() -> Command {
|
|
||||||
Command::new("extract-words")
|
|
||||||
.about(
|
|
||||||
"Extract unique alphanumeric sequences that match common identifier rules from the given file. \
|
|
||||||
Only ASCII is supported.",
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("file")
|
|
||||||
.required(true)
|
|
||||||
.value_parser(value_parser!(PathBuf))
|
|
||||||
.help("Path to the file to extract words from."),
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("min-length")
|
|
||||||
.help("Minimum length to consider a word.")
|
|
||||||
.long("min-length")
|
|
||||||
.short('m')
|
|
||||||
.default_value("3")
|
|
||||||
.value_parser(value_parser!(usize))
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("algorithm")
|
|
||||||
.help("The algorithm to determine matching words")
|
|
||||||
.long("algorithm")
|
|
||||||
.short('a')
|
|
||||||
.default_value("identifier")
|
|
||||||
.value_parser(value_parser!(Algorithm))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
|
|
||||||
#[value(rename_all = "snake_case")]
|
|
||||||
enum Algorithm {
|
|
||||||
Alphabetic,
|
|
||||||
Alphanumeric,
|
|
||||||
Identifier,
|
|
||||||
Number,
|
|
||||||
Hash32,
|
|
||||||
Hash64,
|
|
||||||
Paths,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Algorithm {
|
|
||||||
fn is_start(&self, c: char) -> bool {
|
|
||||||
match self {
|
|
||||||
Self::Alphabetic => c.is_ascii_alphabetic(),
|
|
||||||
Self::Alphanumeric => c.is_ascii_alphanumeric(),
|
|
||||||
Self::Identifier => c.is_ascii_alphabetic(),
|
|
||||||
Self::Number => c.is_numeric(),
|
|
||||||
Self::Hash32 | Self::Hash64 => matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F'),
|
|
||||||
// Supposed to be handled separately
|
|
||||||
Self::Paths => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_body(&self, c: char) -> bool {
|
|
||||||
match self {
|
|
||||||
Self::Alphabetic => c.is_ascii_alphabetic(),
|
|
||||||
Self::Alphanumeric => c.is_ascii_alphanumeric(),
|
|
||||||
Self::Identifier => c.is_ascii_alphanumeric(),
|
|
||||||
Self::Number => c.is_numeric(),
|
|
||||||
Self::Hash32 | Self::Hash64 => matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F'),
|
|
||||||
// Supposed to be handled separately
|
|
||||||
Self::Paths => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_length(&self, len: usize) -> bool {
|
|
||||||
match self {
|
|
||||||
Self::Alphabetic => true,
|
|
||||||
Self::Alphanumeric => true,
|
|
||||||
Self::Identifier => true,
|
|
||||||
Self::Number => true,
|
|
||||||
Self::Hash32 => len == 8,
|
|
||||||
Self::Hash64 => len == 16,
|
|
||||||
// Supposed to be handled separately
|
|
||||||
Self::Paths => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for Algorithm {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"{}",
|
|
||||||
match self {
|
|
||||||
Algorithm::Alphabetic => "alphabetic",
|
|
||||||
Algorithm::Alphanumeric => "alphanumeric",
|
|
||||||
Algorithm::Identifier => "identifier",
|
|
||||||
Algorithm::Number => "number",
|
|
||||||
Algorithm::Hash32 => "hash32",
|
|
||||||
Algorithm::Hash64 => "hash64",
|
|
||||||
Algorithm::Paths => "paths",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
|
||||||
enum PathState {
|
|
||||||
Begin,
|
|
||||||
PathComponent,
|
|
||||||
PathSeparator,
|
|
||||||
Boundary,
|
|
||||||
NonWord,
|
|
||||||
End,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tracing::instrument(skip(chars))]
|
|
||||||
fn extract_paths(chars: impl Iterator<Item = char>) -> Vec<Vec<String>> {
|
|
||||||
let mut chars = chars.peekable();
|
|
||||||
|
|
||||||
let mut state = PathState::Begin;
|
|
||||||
let mut list = Vec::new();
|
|
||||||
let mut path = Vec::new();
|
|
||||||
let mut word = String::new();
|
|
||||||
|
|
||||||
let is_boundary = |c: char| c == '\n' || c == ' ' || c == ',' || c == '\t' || c == '|';
|
|
||||||
|
|
||||||
'machine: loop {
|
|
||||||
state = match state {
|
|
||||||
PathState::Begin => match chars.next() {
|
|
||||||
None => PathState::End,
|
|
||||||
Some(c) if c.is_ascii_alphabetic() => {
|
|
||||||
word.push(c);
|
|
||||||
PathState::PathComponent
|
|
||||||
}
|
|
||||||
Some(c) if is_boundary(c) => PathState::Boundary,
|
|
||||||
Some('/') => PathState::PathSeparator,
|
|
||||||
Some(_) => PathState::NonWord,
|
|
||||||
},
|
|
||||||
PathState::PathComponent => match chars.next() {
|
|
||||||
None => {
|
|
||||||
path.push(word.clone());
|
|
||||||
list.push(path.clone());
|
|
||||||
|
|
||||||
PathState::End
|
|
||||||
}
|
|
||||||
Some(c) if c.is_ascii_alphanumeric() || c == '_' => {
|
|
||||||
word.push(c);
|
|
||||||
PathState::PathComponent
|
|
||||||
}
|
|
||||||
Some('/') => {
|
|
||||||
path.push(word.clone());
|
|
||||||
word.clear();
|
|
||||||
|
|
||||||
PathState::PathSeparator
|
|
||||||
}
|
|
||||||
Some(c) if is_boundary(c) => {
|
|
||||||
path.push(word.clone());
|
|
||||||
list.push(path.clone());
|
|
||||||
|
|
||||||
path.clear();
|
|
||||||
word.clear();
|
|
||||||
|
|
||||||
PathState::Boundary
|
|
||||||
}
|
|
||||||
Some(_) => {
|
|
||||||
list.push(path.clone());
|
|
||||||
|
|
||||||
path.clear();
|
|
||||||
word.clear();
|
|
||||||
|
|
||||||
PathState::NonWord
|
|
||||||
}
|
|
||||||
},
|
|
||||||
PathState::PathSeparator => match chars.next() {
|
|
||||||
None => {
|
|
||||||
list.push(path.clone());
|
|
||||||
PathState::End
|
|
||||||
}
|
|
||||||
Some('/') => PathState::PathSeparator,
|
|
||||||
Some(c) if c.is_ascii_alphabetic() || c == '_' => {
|
|
||||||
word.push(c);
|
|
||||||
PathState::PathComponent
|
|
||||||
}
|
|
||||||
Some(c) if is_boundary(c) => {
|
|
||||||
list.push(path.clone());
|
|
||||||
path.clear();
|
|
||||||
PathState::Boundary
|
|
||||||
}
|
|
||||||
Some(_) => {
|
|
||||||
list.push(path.clone());
|
|
||||||
path.clear();
|
|
||||||
PathState::NonWord
|
|
||||||
}
|
|
||||||
},
|
|
||||||
PathState::Boundary => match chars.next() {
|
|
||||||
None => PathState::End,
|
|
||||||
Some(c) if c.is_ascii_alphabetic() => {
|
|
||||||
word.push(c);
|
|
||||||
PathState::PathComponent
|
|
||||||
}
|
|
||||||
Some(c) if is_boundary(c) => PathState::Boundary,
|
|
||||||
Some(_) => PathState::NonWord,
|
|
||||||
},
|
|
||||||
PathState::NonWord => match chars.next() {
|
|
||||||
None => PathState::End,
|
|
||||||
Some(c) if is_boundary(c) => PathState::Boundary,
|
|
||||||
Some(_) => PathState::NonWord,
|
|
||||||
},
|
|
||||||
PathState::End => {
|
|
||||||
break 'machine;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
list
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tracing::instrument(skip(chars))]
|
|
||||||
fn algorithm_path_components(chars: impl Iterator<Item = char>, min_length: usize) {
|
|
||||||
let mut chars = chars.peekable();
|
|
||||||
|
|
||||||
let mut state = PathState::Begin;
|
|
||||||
let mut word = String::new();
|
|
||||||
let mut lists = vec![HashMap::<String, usize>::new()];
|
|
||||||
let mut index = 0;
|
|
||||||
|
|
||||||
let is_boundary = |c: char| c == '\n' || c == ' ' || c == ',' || c == '\t';
|
|
||||||
|
|
||||||
'machine: loop {
|
|
||||||
state = match state {
|
|
||||||
PathState::Begin => match chars.next() {
|
|
||||||
None => PathState::End,
|
|
||||||
Some(c) if c.is_ascii_alphabetic() => {
|
|
||||||
word.push(c);
|
|
||||||
PathState::PathComponent
|
|
||||||
}
|
|
||||||
Some(c) if is_boundary(c) => PathState::Boundary,
|
|
||||||
// Ignore leading path separators to not trigger the logic of advancing
|
|
||||||
// the component count
|
|
||||||
Some('/') => PathState::Boundary,
|
|
||||||
Some(_) => PathState::NonWord,
|
|
||||||
},
|
|
||||||
PathState::PathComponent => match chars.next() {
|
|
||||||
None => PathState::End,
|
|
||||||
Some(c) if c.is_ascii_alphanumeric() || c == '_' => {
|
|
||||||
word.push(c);
|
|
||||||
PathState::PathComponent
|
|
||||||
}
|
|
||||||
Some('/') => PathState::PathSeparator,
|
|
||||||
Some(c) => {
|
|
||||||
if index > 0 && word.len() >= min_length {
|
|
||||||
let list = &mut lists[index];
|
|
||||||
list.entry(word.clone())
|
|
||||||
.and_modify(|count| *count += 1)
|
|
||||||
.or_insert(1);
|
|
||||||
}
|
|
||||||
word.clear();
|
|
||||||
|
|
||||||
index = 0;
|
|
||||||
|
|
||||||
if is_boundary(c) {
|
|
||||||
PathState::Boundary
|
|
||||||
} else {
|
|
||||||
PathState::NonWord
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
PathState::PathSeparator => {
|
|
||||||
if word.len() >= min_length {
|
|
||||||
let list = &mut lists[index];
|
|
||||||
list.entry(word.clone())
|
|
||||||
.and_modify(|count| *count += 1)
|
|
||||||
.or_insert(1);
|
|
||||||
}
|
|
||||||
word.clear();
|
|
||||||
|
|
||||||
index += 1;
|
|
||||||
if lists.get(index).is_none() {
|
|
||||||
lists.push(HashMap::new());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ignore multiple separators
|
|
||||||
while chars.next_if(|c| *c == '/').is_some() {}
|
|
||||||
|
|
||||||
match chars.next() {
|
|
||||||
None => PathState::End,
|
|
||||||
Some(c) if c.is_ascii_alphabetic() || c == '_' => {
|
|
||||||
word.push(c);
|
|
||||||
PathState::PathComponent
|
|
||||||
}
|
|
||||||
Some(c) if is_boundary(c) => {
|
|
||||||
index = 0;
|
|
||||||
PathState::Boundary
|
|
||||||
}
|
|
||||||
Some(_) => {
|
|
||||||
index = 0;
|
|
||||||
PathState::NonWord
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PathState::Boundary => match chars.next() {
|
|
||||||
None => PathState::End,
|
|
||||||
Some(c) if c.is_ascii_alphabetic() => {
|
|
||||||
word.push(c);
|
|
||||||
PathState::PathComponent
|
|
||||||
}
|
|
||||||
Some(c) if is_boundary(c) => PathState::Boundary,
|
|
||||||
Some(_) => PathState::NonWord,
|
|
||||||
},
|
|
||||||
PathState::NonWord => match chars.next() {
|
|
||||||
None => PathState::End,
|
|
||||||
Some(c) if is_boundary(c) => PathState::Boundary,
|
|
||||||
Some(_) => PathState::NonWord,
|
|
||||||
},
|
|
||||||
PathState::End => {
|
|
||||||
if word.len() >= min_length {
|
|
||||||
let list = &mut lists[index];
|
|
||||||
list.entry(word.clone())
|
|
||||||
.and_modify(|count| *count += 1)
|
|
||||||
.or_insert(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
break 'machine;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i in 0..lists.len() {
|
|
||||||
print!("Word {i}, Count {i},");
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
|
|
||||||
let mut lines: Vec<Vec<Option<(String, usize)>>> = Vec::new();
|
|
||||||
|
|
||||||
for (i, list) in lists.into_iter().enumerate() {
|
|
||||||
let mut entries = list.into_iter().collect::<Vec<_>>();
|
|
||||||
entries.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
|
|
||||||
|
|
||||||
for (j, (word, count)) in entries.into_iter().enumerate() {
|
|
||||||
if let Some(line) = lines.get_mut(j) {
|
|
||||||
while line.len() < i {
|
|
||||||
line.push(None);
|
|
||||||
}
|
|
||||||
line.push(Some((word, count)));
|
|
||||||
} else {
|
|
||||||
let mut line = Vec::new();
|
|
||||||
while line.len() < i {
|
|
||||||
line.push(None);
|
|
||||||
}
|
|
||||||
line.push(Some((word, count)));
|
|
||||||
lines.push(line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for line in lines.iter() {
|
|
||||||
for cell in line.iter() {
|
|
||||||
if let Some((word, count)) = cell {
|
|
||||||
print!("{},{},", word, count);
|
|
||||||
} else {
|
|
||||||
print!(",,");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
|
||||||
enum State {
|
|
||||||
Begin,
|
|
||||||
NonWord,
|
|
||||||
Word,
|
|
||||||
End,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
|
||||||
pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
|
|
||||||
let path = matches
|
|
||||||
.get_one::<PathBuf>("file")
|
|
||||||
.expect("missing required parameter");
|
|
||||||
|
|
||||||
let algorithm = matches
|
|
||||||
.get_one::<Algorithm>("algorithm")
|
|
||||||
.expect("parameter has default");
|
|
||||||
|
|
||||||
let min_length = matches
|
|
||||||
.get_one::<usize>("min-length")
|
|
||||||
.copied()
|
|
||||||
.expect("paramter has default");
|
|
||||||
|
|
||||||
let content = fs::read_to_string(&path)
|
|
||||||
.await
|
|
||||||
.wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
|
|
||||||
let mut chars = content.chars();
|
|
||||||
|
|
||||||
if *algorithm == Algorithm::Paths {
|
|
||||||
algorithm_path_components(chars, min_length);
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut state = State::Begin;
|
|
||||||
let mut word = String::new();
|
|
||||||
let mut visited = HashMap::new();
|
|
||||||
|
|
||||||
'machine: loop {
|
|
||||||
state = match state {
|
|
||||||
State::Begin => match chars.next() {
|
|
||||||
None => State::End,
|
|
||||||
Some(c) if algorithm.is_start(c) => {
|
|
||||||
word.push(c);
|
|
||||||
State::Word
|
|
||||||
}
|
|
||||||
Some(_) => State::NonWord,
|
|
||||||
},
|
|
||||||
State::End => break 'machine,
|
|
||||||
State::NonWord => match chars.next() {
|
|
||||||
None => State::End,
|
|
||||||
Some(c) if algorithm.is_body(c) => {
|
|
||||||
word.push(c);
|
|
||||||
State::Word
|
|
||||||
}
|
|
||||||
Some(_) => State::NonWord,
|
|
||||||
},
|
|
||||||
State::Word => match chars.next() {
|
|
||||||
None => {
|
|
||||||
if word.len() >= min_length && algorithm.is_length(word.len()) {
|
|
||||||
visited
|
|
||||||
.entry(word.clone())
|
|
||||||
.and_modify(|v| *v += 1)
|
|
||||||
.or_insert(1);
|
|
||||||
}
|
|
||||||
State::End
|
|
||||||
}
|
|
||||||
Some(c) if algorithm.is_body(c) => {
|
|
||||||
word.push(c);
|
|
||||||
State::Word
|
|
||||||
}
|
|
||||||
Some(_) => {
|
|
||||||
if word.len() >= min_length && algorithm.is_length(word.len()) {
|
|
||||||
visited
|
|
||||||
.entry(word.clone())
|
|
||||||
.and_modify(|v| *v += 1)
|
|
||||||
.or_insert(1);
|
|
||||||
}
|
|
||||||
word.clear();
|
|
||||||
State::NonWord
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut entries: Vec<(String, usize)> = visited.into_iter().collect();
|
|
||||||
// Reverse sides during comparison to get "highest to lowest"
|
|
||||||
entries.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
|
|
||||||
|
|
||||||
entries
|
|
||||||
.iter()
|
|
||||||
.for_each(|(word, count)| println!("{:016} {}", word, count));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
|
@ -1,26 +0,0 @@
|
||||||
use clap::{ArgMatches, Command};
|
|
||||||
use color_eyre::Result;
|
|
||||||
|
|
||||||
mod brute_force_words;
|
|
||||||
mod extract_words;
|
|
||||||
|
|
||||||
pub(crate) fn command_definition() -> Command {
|
|
||||||
Command::new("experiment")
|
|
||||||
.subcommand_required(true)
|
|
||||||
.about("A collection of utilities and experiments.")
|
|
||||||
.subcommand(brute_force_words::command_definition())
|
|
||||||
.subcommand(extract_words::command_definition())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
|
||||||
pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
|
|
||||||
match matches.subcommand() {
|
|
||||||
// It's fine to block here, as this is the only thing that's executing on the runtime.
|
|
||||||
// The other option with `spawn_blocking` would require setting up values to be Send+Sync.
|
|
||||||
Some(("brute-force-words", sub_matches)) => brute_force_words::run(ctx, sub_matches),
|
|
||||||
Some(("extract-words", sub_matches)) => extract_words::run(ctx, sub_matches).await,
|
|
||||||
_ => unreachable!(
|
|
||||||
"clap is configured to require a subcommand, and they're all handled above"
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -21,7 +21,6 @@ mod cmd {
|
||||||
pub mod build;
|
pub mod build;
|
||||||
pub mod bundle;
|
pub mod bundle;
|
||||||
pub mod dictionary;
|
pub mod dictionary;
|
||||||
pub mod experiment;
|
|
||||||
pub mod migrate;
|
pub mod migrate;
|
||||||
pub mod murmur;
|
pub mod murmur;
|
||||||
pub mod new;
|
pub mod new;
|
||||||
|
@ -57,7 +56,6 @@ async fn main() -> Result<()> {
|
||||||
.subcommand(cmd::build::command_definition())
|
.subcommand(cmd::build::command_definition())
|
||||||
.subcommand(cmd::bundle::command_definition())
|
.subcommand(cmd::bundle::command_definition())
|
||||||
.subcommand(cmd::dictionary::command_definition())
|
.subcommand(cmd::dictionary::command_definition())
|
||||||
.subcommand(cmd::experiment::command_definition())
|
|
||||||
.subcommand(cmd::migrate::command_definition())
|
.subcommand(cmd::migrate::command_definition())
|
||||||
.subcommand(cmd::murmur::command_definition())
|
.subcommand(cmd::murmur::command_definition())
|
||||||
.subcommand(cmd::new::command_definition())
|
.subcommand(cmd::new::command_definition())
|
||||||
|
@ -135,7 +133,6 @@ async fn main() -> Result<()> {
|
||||||
Some(("build", sub_matches)) => cmd::build::run(ctx, sub_matches).await?,
|
Some(("build", sub_matches)) => cmd::build::run(ctx, sub_matches).await?,
|
||||||
Some(("bundle", sub_matches)) => cmd::bundle::run(ctx, sub_matches).await?,
|
Some(("bundle", sub_matches)) => cmd::bundle::run(ctx, sub_matches).await?,
|
||||||
Some(("dictionary", sub_matches)) => cmd::dictionary::run(ctx, sub_matches).await?,
|
Some(("dictionary", sub_matches)) => cmd::dictionary::run(ctx, sub_matches).await?,
|
||||||
Some(("experiment", sub_matches)) => cmd::experiment::run(ctx, sub_matches).await?,
|
|
||||||
Some(("migrate", sub_matches)) => cmd::migrate::run(ctx, sub_matches).await?,
|
Some(("migrate", sub_matches)) => cmd::migrate::run(ctx, sub_matches).await?,
|
||||||
Some(("murmur", sub_matches)) => cmd::murmur::run(ctx, sub_matches).await?,
|
Some(("murmur", sub_matches)) => cmd::murmur::run(ctx, sub_matches).await?,
|
||||||
Some(("new", sub_matches)) => cmd::new::run(ctx, sub_matches).await?,
|
Some(("new", sub_matches)) => cmd::new::run(ctx, sub_matches).await?,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue