Add simpler shell parser
This obsoletes `shlex`. The quoting turned out unnecessary, and the splitting supported a lot more than we need. It also forced unncessary allocations: The splitting doesn't add any characters and keeps UTF-8 intact, so returning slices from the input is perfectly possible. Though this particular implementation will only come to use in the future, as `CmdLine` still requires that the slices are cloned. Still, the custom implementation performs about 3x faster.
This commit is contained in:
parent
7a1727ff3b
commit
535a30a7ca
4 changed files with 214 additions and 14 deletions
|
@ -33,7 +33,10 @@ path-slash = "0.2.1"
|
||||||
async-recursion = "1.0.2"
|
async-recursion = "1.0.2"
|
||||||
notify = "5.1.0"
|
notify = "5.1.0"
|
||||||
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
|
luajit2-sys = { path = "../../lib/luajit2-sys", version = "*" }
|
||||||
shlex = "1.2.0"
|
shlex = { version = "1.2.0", optional = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.3.0"
|
tempfile = "3.3.0"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
shlex-bench = ["dep:shlex"]
|
||||||
|
|
|
@ -3,7 +3,7 @@ use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
||||||
use color_eyre::eyre::{self, Context, Result};
|
use color_eyre::eyre::{self, bail, Context, Result};
|
||||||
use color_eyre::{Help, Report};
|
use color_eyre::{Help, Report};
|
||||||
use futures::future::try_join_all;
|
use futures::future::try_join_all;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
|
@ -12,7 +12,9 @@ use sdk::{Bundle, BundleFile, CmdLine};
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
|
|
||||||
use crate::cmd::util::resolve_bundle_paths;
|
use crate::cmd::util::resolve_bundle_paths;
|
||||||
|
use crate::shell_parse::ShellParser;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn parse_glob_pattern(s: &str) -> Result<Pattern, String> {
|
fn parse_glob_pattern(s: &str) -> Result<Pattern, String> {
|
||||||
match Pattern::new(s) {
|
match Pattern::new(s) {
|
||||||
Ok(p) => Ok(p),
|
Ok(p) => Ok(p),
|
||||||
|
@ -20,6 +22,7 @@ fn parse_glob_pattern(s: &str) -> Result<Pattern, String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn flatten_name(s: &str) -> String {
|
fn flatten_name(s: &str) -> String {
|
||||||
s.replace('/', "_")
|
s.replace('/', "_")
|
||||||
}
|
}
|
||||||
|
@ -131,26 +134,29 @@ async fn parse_command_line_template(tmpl: &String) -> Result<CmdLine> {
|
||||||
let mut cmd = if matches!(fs::try_exists(tmpl).await, Ok(true)) {
|
let mut cmd = if matches!(fs::try_exists(tmpl).await, Ok(true)) {
|
||||||
let path = PathBuf::from(tmpl);
|
let path = PathBuf::from(tmpl);
|
||||||
if path.file_name() == Some(OsStr::new("main.py")) {
|
if path.file_name() == Some(OsStr::new("main.py")) {
|
||||||
let arg = path.display().to_string();
|
|
||||||
let mut cmd = CmdLine::new("python");
|
let mut cmd = CmdLine::new("python");
|
||||||
cmd.arg(shlex::quote(&arg).to_string());
|
cmd.arg(path);
|
||||||
cmd
|
cmd
|
||||||
} else {
|
} else {
|
||||||
CmdLine::new(path)
|
CmdLine::new(path)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let Some(args) = shlex::split(tmpl) else {
|
let mut parsed = ShellParser::new(tmpl.as_bytes());
|
||||||
eyre::bail!("Invalid shell syntax");
|
// Safety: The initial `tmpl` was a `&String` (i.e. valid UTF-8), and `shlex` does not
|
||||||
};
|
// insert or remove characters, nor does it split UTF-8 characters.
|
||||||
|
// So the resulting byte stream is still valid UTF-8.
|
||||||
|
let mut cmd = CmdLine::new(unsafe {
|
||||||
|
let bytes = parsed.next().expect("Template is not empty");
|
||||||
|
String::from_utf8_unchecked(bytes.to_vec())
|
||||||
|
});
|
||||||
|
|
||||||
// We already checked that the template is not empty
|
while let Some(arg) = parsed.next() {
|
||||||
let mut cmd = CmdLine::new(args[0].clone());
|
// Safety: See above.
|
||||||
let mut it = args.iter();
|
cmd.arg(unsafe { String::from_utf8_unchecked(arg.to_vec()) });
|
||||||
// Skip the first one, that's the command name
|
}
|
||||||
it.next();
|
|
||||||
|
|
||||||
for arg in it {
|
if parsed.errored {
|
||||||
cmd.arg(arg);
|
bail!("Invalid command line template");
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd
|
cmd
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#![feature(io_error_more)]
|
#![feature(io_error_more)]
|
||||||
#![feature(let_chains)]
|
#![feature(let_chains)]
|
||||||
#![feature(result_flattening)]
|
#![feature(result_flattening)]
|
||||||
|
#![feature(test)]
|
||||||
#![windows_subsystem = "console"]
|
#![windows_subsystem = "console"]
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
@ -27,6 +28,7 @@ mod cmd {
|
||||||
mod util;
|
mod util;
|
||||||
pub mod watch;
|
pub mod watch;
|
||||||
}
|
}
|
||||||
|
mod shell_parse;
|
||||||
|
|
||||||
#[derive(Default, Deserialize, Serialize)]
|
#[derive(Default, Deserialize, Serialize)]
|
||||||
struct GlobalConfig {
|
struct GlobalConfig {
|
||||||
|
|
189
crates/dtmt/src/shell_parse.rs
Normal file
189
crates/dtmt/src/shell_parse.rs
Normal file
|
@ -0,0 +1,189 @@
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||||
|
enum ParserState {
|
||||||
|
Start,
|
||||||
|
Word,
|
||||||
|
SingleQuote,
|
||||||
|
DoubleQuote,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ShellParser<'a> {
|
||||||
|
bytes: &'a [u8],
|
||||||
|
offset: usize,
|
||||||
|
pub errored: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ShellParser<'a> {
|
||||||
|
pub fn new(bytes: &'a [u8]) -> Self {
|
||||||
|
Self {
|
||||||
|
bytes,
|
||||||
|
offset: 0,
|
||||||
|
errored: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_word(&mut self) -> Option<&'a [u8]> {
|
||||||
|
// The start of the current word. Certain leading characters should be ignored,
|
||||||
|
// so this might change.
|
||||||
|
let mut start = self.offset;
|
||||||
|
let mut state = ParserState::Start;
|
||||||
|
|
||||||
|
while self.offset < self.bytes.len() {
|
||||||
|
let c = self.bytes[self.offset];
|
||||||
|
self.offset += 1;
|
||||||
|
|
||||||
|
match state {
|
||||||
|
ParserState::Start => match c {
|
||||||
|
// Ignore leading whitespace
|
||||||
|
b' ' | b'\t' | b'\n' => start += 1,
|
||||||
|
b'\'' => {
|
||||||
|
state = ParserState::SingleQuote;
|
||||||
|
start += 1;
|
||||||
|
}
|
||||||
|
b'"' => {
|
||||||
|
state = ParserState::DoubleQuote;
|
||||||
|
start += 1;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
state = ParserState::Word;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
ParserState::Word => match c {
|
||||||
|
// Unquoted whitespace ends the current word
|
||||||
|
b' ' | b'\t' | b'\n' => {
|
||||||
|
return Some(&self.bytes[start..self.offset - 1]);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
},
|
||||||
|
ParserState::SingleQuote => match c {
|
||||||
|
b'\'' => {
|
||||||
|
return Some(&self.bytes[start..(self.offset - 1)]);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
},
|
||||||
|
ParserState::DoubleQuote => match c {
|
||||||
|
b'"' => {
|
||||||
|
return Some(&self.bytes[start..(self.offset - 1)]);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match state {
|
||||||
|
ParserState::Start => None,
|
||||||
|
ParserState::Word => Some(&self.bytes[start..self.offset]),
|
||||||
|
ParserState::SingleQuote | ParserState::DoubleQuote => {
|
||||||
|
self.errored = true;
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for ShellParser<'a> {
|
||||||
|
type Item = &'a [u8];
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.parse_word()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_one_word() {
|
||||||
|
let mut it = ShellParser::new(b"hello");
|
||||||
|
assert_eq!(it.next(), Some("hello".as_bytes()));
|
||||||
|
assert_eq!(it.next(), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_one_single() {
|
||||||
|
let mut it = ShellParser::new(b"'hello'");
|
||||||
|
assert_eq!(it.next(), Some("hello".as_bytes()));
|
||||||
|
assert_eq!(it.next(), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_open_quote() {
|
||||||
|
let mut it = ShellParser::new(b"'hello");
|
||||||
|
assert_eq!(it.next(), None);
|
||||||
|
assert!(it.errored)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ww2ogg() {
|
||||||
|
let mut it = ShellParser::new(
|
||||||
|
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
|
||||||
|
);
|
||||||
|
assert_eq!(it.next(), Some("ww2ogg.exe".as_bytes()));
|
||||||
|
assert_eq!(it.next(), Some("--pcb".as_bytes()));
|
||||||
|
assert_eq!(
|
||||||
|
it.next(),
|
||||||
|
Some("/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin".as_bytes())
|
||||||
|
);
|
||||||
|
assert_eq!(it.next(), None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod bench {
|
||||||
|
extern crate test;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
#[cfg(feature = "shlex-bench")]
|
||||||
|
use shlex::bytes::Shlex;
|
||||||
|
use test::Bencher;
|
||||||
|
|
||||||
|
mod ww2ogg {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn custom(b: &mut Bencher) {
|
||||||
|
let val = test::black_box(
|
||||||
|
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
|
||||||
|
);
|
||||||
|
b.iter(|| {
|
||||||
|
let it = ShellParser::new(val);
|
||||||
|
let _: Vec<_> = test::black_box(it.collect());
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "shlex-bench")]
|
||||||
|
#[bench]
|
||||||
|
fn shlex(b: &mut Bencher) {
|
||||||
|
let val = test::black_box(
|
||||||
|
b"ww2ogg.exe --pcb \"/usr/share/ww2ogg/packed_cookbook_aoTuV_603.bin\"",
|
||||||
|
);
|
||||||
|
b.iter(|| {
|
||||||
|
let it = Shlex::new(val);
|
||||||
|
let _: Vec<_> = test::black_box(it.collect());
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mod one_single {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn custom(b: &mut Bencher) {
|
||||||
|
let val = test::black_box(b"'hello'");
|
||||||
|
b.iter(|| {
|
||||||
|
let it = ShellParser::new(val);
|
||||||
|
let _: Vec<_> = test::black_box(it.collect());
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "shlex-bench")]
|
||||||
|
#[bench]
|
||||||
|
fn shlex(b: &mut Bencher) {
|
||||||
|
let val = test::black_box(b"'hello'");
|
||||||
|
b.iter(|| {
|
||||||
|
let it = Shlex::new(val);
|
||||||
|
let _: Vec<_> = test::black_box(it.collect());
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue