sdk: Improve word generation throughput

It seems that the simple `println!()` is really bad when the goal
is to write a lot of data to stdout.
Presumably because it's unbuffered, but also because it required the
preceding code to do a lot of allocations.

This was replaced with a buffered writer on stdout, as well as an extra
`Vec<u8>` that I can write everything to directly from the word and
delimiter iterators, without allocating a single new structure.
This commit is contained in:
Lucas Schwiderski 2023-09-18 10:26:58 +02:00
parent 6485dae27b
commit 0d1193a126
Signed by: lucas
GPG key ID: AA12679AAA6DF4D8

View file

@ -5,6 +5,7 @@ use color_eyre::eyre::{self, Context};
use color_eyre::Result;
use itertools::Itertools;
use tokio::fs;
use tokio::io::{AsyncWriteExt, BufWriter};
pub(crate) fn command_definition() -> Command {
Command::new("brute-force-words")
@ -98,6 +99,38 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
let delimiters_len = delimiters.len();
let prefixes = [
"",
"content/characters/",
"content/debug/",
"content/decals/",
"content/environment/",
"content/fx/",
"content/gizmos/",
"content/items/",
"content/levels/",
"content/liquid_area/",
"content/localization/",
"content/materials/",
"content/minion_impact_assets/",
"content/pickups/",
"content/shading_environments/",
"content/textures/",
"content/ui/",
"content/videos/",
"content/vo/",
"content/volume_types/",
"content/weapons/",
"packages/boot_assets/",
"packages/content/",
"packages/game_scripts/",
"packages/strings/",
"packages/ui/",
"wwise/events/",
"wwise/packages/",
"wwise/world_sound_fx/",
];
let word_count = words.len();
tracing::info!("{} words to try", word_count);
@ -171,6 +204,13 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
indices.reserve(max_length);
sequence.reserve(max_length);
let mut writer = BufWriter::new(tokio::io::stdout());
let mut buf = Vec::with_capacity(1024);
const LINE_FEED: u8 = 0x0A;
const UNDERSCORE: u8 = 0x5F;
const ZERO: u8 = 0x30;
'outer: loop {
// We only want delimiters between words, so we keep that iterator shorter by
// one.
@ -191,20 +231,38 @@ pub(crate) async fn run(_ctx: sdk::Context, matches: &ArgMatches) -> Result<()>
.iter()
.map(|s| s.as_str())
.take(delimiter_count as usize);
let s: String = sequence
.iter()
.copied()
.interleave(delims)
.flat_map(|word| word.chars())
.collect();
let s = sequence.iter().copied().interleave(delims.clone());
count = count.wrapping_add(1);
if count % 500000 == 0 {
tracing::info!("{} words generated", count);
buf.clear();
for prefix in prefixes.iter() {
buf.extend_from_slice(prefix.as_bytes());
s.clone()
.for_each(|word| buf.extend_from_slice(word.as_bytes()));
// buf.extend_from_slice(s.as_bytes());
buf.push(LINE_FEED);
for i in 0..=9 {
buf.extend_from_slice(prefix.as_bytes());
s.clone()
.for_each(|word| buf.extend_from_slice(word.as_bytes()));
buf.push(UNDERSCORE);
buf.push(ZERO + i);
buf.push(LINE_FEED);
buf.extend_from_slice(prefix.as_bytes());
s.clone()
.for_each(|word| buf.extend_from_slice(word.as_bytes()));
buf.push(UNDERSCORE);
buf.push(ZERO);
buf.push(ZERO + i);
buf.push(LINE_FEED);
}
}
println!("{}", s);
writer.write_all(&buf).await?;
}
for i in 0..indices_len {