experiment: Add command to create word permutations
This creates candidate values to brute force dictionary entries with, by building combinations from a word list and delimiters.
This commit is contained in:
parent
94347d57f9
commit
6485dae27b
4 changed files with 277 additions and 2 deletions
35
Cargo.lock
generated
35
Cargo.lock
generated
|
@ -161,6 +161,17 @@ dependencies = [
|
||||||
"system-deps",
|
"system-deps",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "atty"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi 0.1.19",
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.3.0"
|
version = "1.3.0"
|
||||||
|
@ -212,7 +223,7 @@ dependencies = [
|
||||||
"bitflags 2.5.0",
|
"bitflags 2.5.0",
|
||||||
"cexpr",
|
"cexpr",
|
||||||
"clang-sys",
|
"clang-sys",
|
||||||
"itertools",
|
"itertools 0.12.1",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"lazycell",
|
"lazycell",
|
||||||
"log",
|
"log",
|
||||||
|
@ -927,6 +938,7 @@ name = "dtmt"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-recursion",
|
"async-recursion",
|
||||||
|
"atty",
|
||||||
"clap",
|
"clap",
|
||||||
"cli-table",
|
"cli-table",
|
||||||
"color-eyre",
|
"color-eyre",
|
||||||
|
@ -936,6 +948,7 @@ dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"glob",
|
"glob",
|
||||||
|
"itertools 0.11.0",
|
||||||
"luajit2-sys",
|
"luajit2-sys",
|
||||||
"nanorand",
|
"nanorand",
|
||||||
"notify",
|
"notify",
|
||||||
|
@ -1598,6 +1611,15 @@ version = "0.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.1.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
|
@ -1858,6 +1880,15 @@ version = "1.70.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
|
checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itertools"
|
name = "itertools"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
|
@ -2267,7 +2298,7 @@ version = "1.16.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hermit-abi",
|
"hermit-abi 0.3.9",
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,8 @@ async-recursion = "1.0.2"
|
||||||
notify = "6.1.1"
|
notify = "6.1.1"
|
||||||
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
|
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
|
||||||
shlex = { version = "1.2.0", optional = true }
|
shlex = { version = "1.2.0", optional = true }
|
||||||
|
atty = "0.2.14"
|
||||||
|
itertools = "0.11.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.3.0"
|
tempfile = "3.3.0"
|
||||||
|
|
239
crates/dtmt/src/cmd/experiment/brute_force_words.rs
Normal file
239
crates/dtmt/src/cmd/experiment/brute_force_words.rs
Normal file
|
@ -0,0 +1,239 @@
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
||||||
|
use color_eyre::eyre::{self, Context};
|
||||||
|
use color_eyre::Result;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use tokio::fs;
|
||||||
|
|
||||||
|
pub(crate) fn command_definition() -> Command {
|
||||||
|
Command::new("brute-force-words")
|
||||||
|
.about(
|
||||||
|
"Given a list of words and a set of delimiters, iteratevily creates permutations \
|
||||||
|
of growing length.\n\
|
||||||
|
Delimiters are placed between every word in the result.\n\n\
|
||||||
|
Example: \
|
||||||
|
Given the words ['packages', 'boot'], the delimiters ['/', '_'] and a length of 2, the resulting \
|
||||||
|
words will be\n\
|
||||||
|
- packages\n\
|
||||||
|
- boot\n\
|
||||||
|
- packages/packages\n\
|
||||||
|
- packages_packages\n\
|
||||||
|
- packages/boot\n\
|
||||||
|
- packages_boot\n\
|
||||||
|
- boot/packages\n\
|
||||||
|
- boot_packages\n\
|
||||||
|
- boot/boot\n\
|
||||||
|
- boot_boot",
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new("delimiter")
|
||||||
|
.help(
|
||||||
|
"The delimiters to put between the words. \
|
||||||
|
All permutations of this list will be tried for every string of words.\n\
|
||||||
|
Specify multiple times to set multiple values.\n\
|
||||||
|
Defaults to ['/', '_'].",
|
||||||
|
)
|
||||||
|
.short('d')
|
||||||
|
.long("delimiter")
|
||||||
|
.action(ArgAction::Append),
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new("max-length")
|
||||||
|
.help("The maximum number of words up to which to build strings.")
|
||||||
|
.long("max")
|
||||||
|
.long("max-length")
|
||||||
|
.short('m')
|
||||||
|
.default_value("5")
|
||||||
|
.value_parser(value_parser!(usize)),
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new("continue")
|
||||||
|
.help("Can be used to continue a previous operation where it stopped. Word list and delimiters must match.")
|
||||||
|
.short('c')
|
||||||
|
.long("continue")
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new("words")
|
||||||
|
.help("Path to a file containing words line by line.")
|
||||||
|
.required(true)
|
||||||
|
.value_parser(value_parser!(PathBuf)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(skip_all)]
|
||||||
|
#[allow(clippy::mut_range_bound)]
|
||||||
|
pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
|
||||||
|
let max_length: usize = matches
|
||||||
|
.get_one::<usize>("max-length")
|
||||||
|
.copied()
|
||||||
|
.expect("parameter has default");
|
||||||
|
|
||||||
|
let words: Vec<String> = {
|
||||||
|
let path = matches
|
||||||
|
.get_one::<PathBuf>("words")
|
||||||
|
.expect("missing required parameter");
|
||||||
|
|
||||||
|
let file = fs::read_to_string(&path)
|
||||||
|
.await
|
||||||
|
.wrap_err_with(|| format!("Failed to read file '{}'", path.display()))?;
|
||||||
|
|
||||||
|
file.lines().map(str::to_string).collect()
|
||||||
|
};
|
||||||
|
|
||||||
|
if words.is_empty() {
|
||||||
|
eyre::bail!("Word list must not be empty");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut delimiters: Vec<String> = matches
|
||||||
|
.get_many::<String>("delimiter")
|
||||||
|
.unwrap_or_default()
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if delimiters.is_empty() {
|
||||||
|
delimiters.push(String::from("/"));
|
||||||
|
delimiters.push(String::from("_"));
|
||||||
|
}
|
||||||
|
|
||||||
|
let delimiters_len = delimiters.len();
|
||||||
|
|
||||||
|
let word_count = words.len();
|
||||||
|
tracing::info!("{} words to try", word_count);
|
||||||
|
|
||||||
|
// To be able to easily combine the permutations of words and delimiters,
|
||||||
|
// we turn the latter into a pre-defined list of all permutations of delimiters
|
||||||
|
// that are possible at the given amount of words.
|
||||||
|
// Combining `Iterator::cycle` with `Itertools::permutations` works, but
|
||||||
|
// with a high `max_length`, it runs OOM.
|
||||||
|
// So we basically have to implement a smaller version of the iterative algorithm we use later on
|
||||||
|
// to build permutations of the actual words.
|
||||||
|
let delimiter_lists = {
|
||||||
|
let mut indices = vec![0; max_length - 1];
|
||||||
|
let mut list = Vec::new();
|
||||||
|
|
||||||
|
for _ in 0..delimiters_len.pow(max_length as u32 - 1) {
|
||||||
|
list.push(indices.iter().map(|i| &delimiters[*i]).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
for v in indices.iter_mut() {
|
||||||
|
if *v >= delimiters_len - 1 {
|
||||||
|
*v = 0;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
*v += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
list
|
||||||
|
};
|
||||||
|
|
||||||
|
tracing::debug!("{:?}", delimiter_lists);
|
||||||
|
|
||||||
|
let mut count = 0u64;
|
||||||
|
|
||||||
|
let mut indices = if let Some(cont) = matches.get_one::<String>("continue").cloned() {
|
||||||
|
let mut splits = vec![cont.clone()];
|
||||||
|
|
||||||
|
for delim in delimiters.iter() {
|
||||||
|
splits = splits
|
||||||
|
.iter()
|
||||||
|
.flat_map(|s| s.split(delim))
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
let indices = splits
|
||||||
|
.into_iter()
|
||||||
|
.map(|s| {
|
||||||
|
words
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.find(|(_, v)| s == **v)
|
||||||
|
.map(|(i, _)| i)
|
||||||
|
.ok_or_else(|| eyre::eyre!("'{}' is not in the word list", s))
|
||||||
|
})
|
||||||
|
.collect::<Result<_>>()?;
|
||||||
|
|
||||||
|
tracing::info!("Continuing from '{}' -> '{:?}'", cont, &indices);
|
||||||
|
|
||||||
|
indices
|
||||||
|
} else {
|
||||||
|
vec![0]
|
||||||
|
};
|
||||||
|
let mut indices_len = indices.len();
|
||||||
|
let mut sequence = indices
|
||||||
|
.iter()
|
||||||
|
.map(|index| words[*index].as_str())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Prevent re-allocation by reserving as much as we need upfront
|
||||||
|
indices.reserve(max_length);
|
||||||
|
sequence.reserve(max_length);
|
||||||
|
|
||||||
|
'outer: loop {
|
||||||
|
// We only want delimiters between words, so we keep that iterator shorter by
|
||||||
|
// one.
|
||||||
|
let delimiter_count = sequence.len() as u32 - 1;
|
||||||
|
|
||||||
|
tracing::trace!(
|
||||||
|
"{} | {:?} -> {:?}",
|
||||||
|
delimiters_len.pow(delimiter_count),
|
||||||
|
indices,
|
||||||
|
sequence
|
||||||
|
);
|
||||||
|
|
||||||
|
for delims in delimiter_lists
|
||||||
|
.iter()
|
||||||
|
.take(delimiters_len.pow(delimiter_count))
|
||||||
|
{
|
||||||
|
let delims = delims
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.as_str())
|
||||||
|
.take(delimiter_count as usize);
|
||||||
|
let s: String = sequence
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.interleave(delims)
|
||||||
|
.flat_map(|word| word.chars())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
count = count.wrapping_add(1);
|
||||||
|
|
||||||
|
if count % 500000 == 0 {
|
||||||
|
tracing::info!("{} words generated", count);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("{}", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 0..indices_len {
|
||||||
|
let index = indices.get_mut(i).unwrap();
|
||||||
|
let word = sequence.get_mut(i).unwrap();
|
||||||
|
|
||||||
|
if *index >= word_count - 1 {
|
||||||
|
*index = 0;
|
||||||
|
*word = words[*index].as_str();
|
||||||
|
|
||||||
|
if indices.get(i + 1).is_none() {
|
||||||
|
indices.push(0);
|
||||||
|
sequence.push(words[0].as_str());
|
||||||
|
|
||||||
|
indices_len += 1;
|
||||||
|
|
||||||
|
if indices_len > max_length {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*index += 1;
|
||||||
|
*word = words[*index].as_str();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -1,18 +1,21 @@
|
||||||
use clap::{ArgMatches, Command};
|
use clap::{ArgMatches, Command};
|
||||||
use color_eyre::Result;
|
use color_eyre::Result;
|
||||||
|
|
||||||
|
mod brute_force_words;
|
||||||
mod extract_words;
|
mod extract_words;
|
||||||
|
|
||||||
pub(crate) fn command_definition() -> Command {
|
pub(crate) fn command_definition() -> Command {
|
||||||
Command::new("experiment")
|
Command::new("experiment")
|
||||||
.subcommand_required(true)
|
.subcommand_required(true)
|
||||||
.about("A collection of utilities and experiments.")
|
.about("A collection of utilities and experiments.")
|
||||||
|
.subcommand(brute_force_words::command_definition())
|
||||||
.subcommand(extract_words::command_definition())
|
.subcommand(extract_words::command_definition())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
#[tracing::instrument(skip_all)]
|
||||||
pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
|
pub(crate) async fn run(ctx: sdk::Context, matches: &ArgMatches) -> Result<()> {
|
||||||
match matches.subcommand() {
|
match matches.subcommand() {
|
||||||
|
Some(("brute-force-words", sub_matches)) => brute_force_words::run(ctx, sub_matches).await,
|
||||||
Some(("extract-words", sub_matches)) => extract_words::run(ctx, sub_matches).await,
|
Some(("extract-words", sub_matches)) => extract_words::run(ctx, sub_matches).await,
|
||||||
_ => unreachable!(
|
_ => unreachable!(
|
||||||
"clap is configured to require a subcommand, and they're all handled above"
|
"clap is configured to require a subcommand, and they're all handled above"
|
||||||
|
|
Loading…
Add table
Reference in a new issue