Add simpler shell parser
This obsoletes `shlex`. The quoting turned out unnecessary, and the splitting supported a lot more than we need. It also forced unncessary allocations: The splitting doesn't add any characters and keeps UTF-8 intact, so returning slices from the input is perfectly possible. Though this particular implementation will only come to use in the future, as `CmdLine` still requires that the slices are cloned. Still, the custom implementation performs about 3x faster.
This commit is contained in:
parent
7a1727ff3b
commit
535a30a7ca
4 changed files with 214 additions and 14 deletions
|
@ -33,7 +33,10 @@ path-slash = "0.2.1"
|
|||
async-recursion = "1.0.2"
|
||||
notify = "5.1.0"
|
||||
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
|
||||
shlex = "1.2.0"
|
||||
shlex = { version = "1.2.0", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.3.0"
|
||||
|
||||
[features]
|
||||
shlex-bench = ["dep:shlex"]
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::path::{Path, PathBuf};
|
|||
use std::sync::Arc;
|
||||
|
||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
||||
use color_eyre::eyre::{self, Context, Result};
|
||||
use color_eyre::eyre::{self, bail, Context, Result};
|
||||
use color_eyre::{Help, Report};
|
||||
use futures::future::try_join_all;
|
||||
use futures::StreamExt;
|
||||
|
@ -12,7 +12,9 @@ use sdk::{Bundle, BundleFile, CmdLine};
|
|||
use tokio::fs;
|
||||
|
||||
use crate::cmd::util::resolve_bundle_paths;
|
||||
use crate::shell_parse::ShellParser;
|
||||
|
||||
#[inline]
|
||||
fn parse_glob_pattern(s: &str) -> Result<Pattern, String> {
|
||||
match Pattern::new(s) {
|
||||
Ok(p) => Ok(p),
|
||||
|
@ -20,6 +22,7 @@ fn parse_glob_pattern(s: &str) -> Result<Pattern, String> {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn flatten_name(s: &str) -> String {
|
||||
s.replace('/', "_")
|
||||
}
|
||||
|
@ -131,26 +134,29 @@ async fn parse_command_line_template(tmpl: &String) -> Result<CmdLine> {
|
|||
let mut cmd = if matches!(fs::try_exists(tmpl).await, Ok(true)) {
|
||||
let path = PathBuf::from(tmpl);
|
||||
if path.file_name() == Some(OsStr::new("main.py")) {
|
||||
let arg = path.display().to_string();
|
||||
let mut cmd = CmdLine::new("python");
|
||||
cmd.arg(shlex::quote(&arg).to_string());
|
||||
cmd.arg(path);
|
||||
cmd
|
||||
} else {
|
||||
CmdLine::new(path)
|
||||
}
|
||||
} else {
|
||||
let Some(args) = shlex::split(tmpl) else {
|
||||
eyre::bail!("Invalid shell syntax");
|
||||
};
|
||||
let mut parsed = ShellParser::new(tmpl.as_bytes());
|
||||
// Safety: The initial `tmpl` was a `&String` (i.e. valid UTF-8), and `shlex` does not
|
||||
// insert or remove characters, nor does it split UTF-8 characters.
|
||||
// So the resulting byte stream is still valid UTF-8.
|
||||
let mut cmd = CmdLine::new(unsafe {
|
||||
let bytes = parsed.next().expect("Template is not empty");
|
||||
String::from_utf8_unchecked(bytes.to_vec())
|
||||
});
|
||||
|
||||
// We already checked that the template is not empty
|
||||
let mut cmd = CmdLine::new(args[0].clone());
|
||||
let mut it = args.iter();
|
||||
// Skip the first one, that's the command name
|
||||
it.next();
|
||||
while let Some(arg) = parsed.next() {
|
||||
// Safety: See above.
|
||||
cmd.arg(unsafe { String::from_utf8_unchecked(arg.to_vec()) });
|
||||
}
|
||||
|
||||
for arg in it {
|
||||
cmd.arg(arg);
|
||||
if parsed.errored {
|
||||
bail!("Invalid command line template");
|
||||
}
|
||||
|
||||
cmd
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#![feature(io_error_more)]
|
||||
#![feature(let_chains)]
|
||||
#![feature(result_flattening)]
|
||||
#![feature(test)]
|
||||
#![windows_subsystem = "console"]
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
@ -27,6 +28,7 @@ mod cmd {
|
|||
mod util;
|
||||
pub mod watch;
|
||||
}
|
||||
mod shell_parse;
|
||||
|
||||
#[derive(Default, Deserialize, Serialize)]
|
||||
struct GlobalConfig {
|
||||
|
|
189
crates/dtmt/src/shell_parse.rs
Normal file
189
crates/dtmt/src/shell_parse.rs
Normal file
|
@ -0,0 +1,189 @@
|
|||
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||
enum ParserState {
|
||||
Start,
|
||||
Word,
|
||||
SingleQuote,
|
||||
DoubleQuote,
|
||||
}
|
||||
|
||||
pub struct ShellParser<'a> {
|
||||
bytes: &'a [u8],
|
||||
offset: usize,
|
||||
pub errored: bool,
|
||||
}
|
||||
|
||||
impl<'a> ShellParser<'a> {
|
||||
pub fn new(bytes: &'a [u8]) -> Self {
|
||||
Self {
|
||||
bytes,
|
||||
offset: 0,
|
||||
errored: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_word(&mut self) -> Option<&'a [u8]> {
|
||||
// The start of the current word. Certain leading characters should be ignored,
|
||||
// so this might change.
|
||||
let mut start = self.offset;
|
||||
let mut state = ParserState::Start;
|
||||
|
||||
while self.offset < self.bytes.len() {
|
||||
let c = self.bytes[self.offset];
|
||||
self.offset += 1;
|
||||
|
||||
match state {
|
||||
ParserState::Start => match c {
|
||||
// Ignore leading whitespace
|
||||
b' ' | b'\t' | b'\n' => start += 1,
|
||||
b'\'' => {
|
||||
state = ParserState::SingleQuote;
|
||||
start += 1;
|
||||
}
|
||||
b'"' => {
|
||||
state = ParserState::DoubleQuote;
|
||||
start += 1;
|
||||
}
|
||||
_ => {
|
||||
state = ParserState::Word;
|
||||
}
|
||||
},
|
||||
ParserState::Word => match c {
|
||||
// Unquoted whitespace ends the current word
|
||||
b' ' | b'\t' | b'\n' => {
|
||||
return Some(&self.bytes[start..self.offset - 1]);
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
ParserState::SingleQuote => match c {
|
||||
b'\'' => {
|
||||
return Some(&self.bytes[start..(self.offset - 1)]);
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
ParserState::DoubleQuote => match c {
|
||||
b'"' => {
|
||||
return Some(&self.bytes[start..(self.offset - 1)]);
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
match state {
|
||||
ParserState::Start => None,
|
||||
ParserState::Word => Some(&self.bytes[start..self.offset]),
|
||||
ParserState::SingleQuote | ParserState::DoubleQuote => {
|
||||
self.errored = true;
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ShellParser<'a> {
|
||||
type Item = &'a [u8];
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.parse_word()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_one_word() {
|
||||
let mut it = ShellParser::new(b"hello");
|
||||
assert_eq!(it.next(), Some("hello".as_bytes()));
|
||||
assert_eq!(it.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_one_single() {
|
||||
let mut it = ShellParser::new(b"'hello'");
|
||||
assert_eq!(it.next(), Some("hello".as_bytes()));
|
||||
assert_eq!(it.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_open_quote() {
|
||||
let mut it = ShellParser::new(b"'hello");
|
||||
assert_eq!(it.next(), None);
|
||||
assert!(it.errored)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ww2ogg() {
|
||||
let mut it = ShellParser::new(
|
||||
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
|
||||
);
|
||||
assert_eq!(it.next(), Some("ww2ogg.exe".as_bytes()));
|
||||
assert_eq!(it.next(), Some("--pcb".as_bytes()));
|
||||
assert_eq!(
|
||||
it.next(),
|
||||
Some("/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin".as_bytes())
|
||||
);
|
||||
assert_eq!(it.next(), None);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod bench {
|
||||
extern crate test;
|
||||
|
||||
use super::*;
|
||||
#[cfg(feature = "shlex-bench")]
|
||||
use shlex::bytes::Shlex;
|
||||
use test::Bencher;
|
||||
|
||||
mod ww2ogg {
|
||||
use super::*;
|
||||
|
||||
#[bench]
|
||||
fn custom(b: &mut Bencher) {
|
||||
let val = test::black_box(
|
||||
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
|
||||
);
|
||||
b.iter(|| {
|
||||
let it = ShellParser::new(val);
|
||||
let _: Vec<_> = test::black_box(it.collect());
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(feature = "shlex-bench")]
|
||||
#[bench]
|
||||
fn shlex(b: &mut Bencher) {
|
||||
let val = test::black_box(
|
||||
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
|
||||
);
|
||||
b.iter(|| {
|
||||
let it = Shlex::new(val);
|
||||
let _: Vec<_> = test::black_box(it.collect());
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
mod one_single {
|
||||
use super::*;
|
||||
|
||||
#[bench]
|
||||
fn custom(b: &mut Bencher) {
|
||||
let val = test::black_box(b"'hello'");
|
||||
b.iter(|| {
|
||||
let it = ShellParser::new(val);
|
||||
let _: Vec<_> = test::black_box(it.collect());
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(feature = "shlex-bench")]
|
||||
#[bench]
|
||||
fn shlex(b: &mut Bencher) {
|
||||
let val = test::black_box(b"'hello'");
|
||||
b.iter(|| {
|
||||
let it = Shlex::new(val);
|
||||
let _: Vec<_> = test::black_box(it.collect());
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue