395 lines
12 KiB
Rust
395 lines
12 KiB
Rust
use nom::branch::alt;
|
|
use nom::bytes::complete::{tag, take_until};
|
|
use nom::character::complete::{alpha1, alphanumeric1, char, digit1, not_line_ending, one_of};
|
|
use nom::combinator::{cut, eof, map, map_res, opt, recognize, value};
|
|
use nom::multi::{many0_count, many1_count};
|
|
use nom::number::complete::double;
|
|
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
|
|
use nom::{IResult, Slice};
|
|
use nom_locate::LocatedSpan;
|
|
|
|
pub(crate) type Span<'a> = LocatedSpan<&'a str>;
|
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
pub(crate) enum Token {
|
|
ArrayEnd,
|
|
ArrayStart,
|
|
Boolean(bool),
|
|
Eof,
|
|
Equals,
|
|
Float(f64),
|
|
Integer(i64),
|
|
Null,
|
|
ObjectEnd,
|
|
ObjectStart,
|
|
Separator,
|
|
String(String),
|
|
}
|
|
|
|
fn horizontal_whitespace(input: Span) -> IResult<Span, char> {
|
|
one_of(" \t")(input)
|
|
}
|
|
|
|
fn whitespace(input: Span) -> IResult<Span, char> {
|
|
one_of(" \n\r\t")(input)
|
|
}
|
|
|
|
fn null(input: Span) -> IResult<Span, ()> {
|
|
value((), tag("null"))(input)
|
|
}
|
|
|
|
fn separator(input: Span) -> IResult<Span, &str> {
|
|
map(alt((tag(","), tag("\n"))), |val: Span| *val.fragment())(input)
|
|
}
|
|
|
|
fn bool(input: Span) -> IResult<Span, bool> {
|
|
alt((value(true, tag("true")), value(false, tag("false"))))(input)
|
|
}
|
|
|
|
fn integer(input: Span) -> IResult<Span, i64> {
|
|
map_res(recognize(tuple((opt(char('-')), digit1))), |val: Span| {
|
|
val.fragment().parse::<i64>()
|
|
})(input)
|
|
}
|
|
|
|
fn float(input: Span) -> IResult<Span, f64> {
|
|
double(input)
|
|
}
|
|
|
|
fn identifier(input: Span) -> IResult<Span, &str> {
|
|
let leading = alt((alpha1, tag("_")));
|
|
let trailing = many0_count(alt((alphanumeric1, tag("_"))));
|
|
let ident = pair(leading, trailing);
|
|
|
|
map(recognize(ident), |val: Span| *val.fragment())(input)
|
|
}
|
|
|
|
fn string_content(input: Span) -> IResult<Span, &str> {
|
|
let buf = input.fragment();
|
|
let mut escaped = false;
|
|
let mut i = 0;
|
|
|
|
for (j, ch) in buf.char_indices() {
|
|
i = j;
|
|
match ch {
|
|
'\\' if !escaped => {
|
|
escaped = true;
|
|
}
|
|
'\n' if !escaped => {
|
|
let err = nom::error::Error {
|
|
input: input.slice(j..),
|
|
code: nom::error::ErrorKind::Char,
|
|
};
|
|
return Err(nom::Err::Error(err));
|
|
}
|
|
'"' if !escaped => {
|
|
return Ok((input.slice(j..), &buf[0..j]));
|
|
}
|
|
_ => escaped = false,
|
|
}
|
|
}
|
|
|
|
let err = nom::error::Error {
|
|
input: input.slice((i + 1)..),
|
|
code: nom::error::ErrorKind::Char,
|
|
};
|
|
Err(nom::Err::Failure(err))
|
|
}
|
|
|
|
fn delimited_string(input: Span) -> IResult<Span, &str> {
|
|
preceded(char('"'), cut(terminated(string_content, char('"'))))(input)
|
|
}
|
|
|
|
fn string(input: Span) -> IResult<Span, &str> {
|
|
alt((identifier, delimited_string))(input)
|
|
}
|
|
|
|
fn line_comment(input: Span) -> IResult<Span, &str> {
|
|
map(
|
|
preceded(tag("//"), alt((not_line_ending, eof))),
|
|
|val: Span| *val.fragment(),
|
|
)(input)
|
|
}
|
|
|
|
fn block_comment(input: Span) -> IResult<Span, &str> {
|
|
map(
|
|
delimited(tag("/*"), take_until("*/"), tag("*/")),
|
|
|val: Span| *val.fragment(),
|
|
)(input)
|
|
}
|
|
|
|
fn comment(input: Span) -> IResult<Span, &str> {
|
|
alt((line_comment, block_comment))(input)
|
|
}
|
|
|
|
fn optional(input: Span) -> IResult<Span, ()> {
|
|
let whitespace = value((), whitespace);
|
|
let comment = value((), comment);
|
|
let empty = value((), tag(""));
|
|
let content = value((), many1_count(alt((whitespace, comment))));
|
|
|
|
alt((content, empty))(input)
|
|
}
|
|
|
|
pub(crate) fn parse_next_token(input: Span) -> IResult<Span, Token> {
|
|
preceded(
|
|
opt(optional),
|
|
alt((
|
|
// Order is important here.
|
|
// Certain valid strings like "null", "true" or "false" need to be
|
|
// matched to their special value.
|
|
// Integer-like numbers need to be matched to that, but are valid floats, too.
|
|
value(Token::Eof, eof),
|
|
value(Token::Separator, separator),
|
|
value(Token::ObjectStart, tag("{")),
|
|
value(Token::ObjectEnd, tag("}")),
|
|
value(Token::ArrayStart, tag("[")),
|
|
value(Token::ArrayEnd, tag("]")),
|
|
value(Token::Equals, tag("=")),
|
|
value(Token::Null, null),
|
|
map(bool, Token::Boolean),
|
|
map(integer, Token::Integer),
|
|
map(float, Token::Float),
|
|
map(string, |val| Token::String(val.to_string())),
|
|
)),
|
|
)(input)
|
|
}
|
|
|
|
pub(crate) fn parse_trailing_characters(input: Span) -> IResult<Span, ()> {
|
|
value((), optional)(input)
|
|
}
|
|
|
|
pub(crate) fn parse_null(input: Span) -> IResult<Span, Token> {
|
|
preceded(optional, value(Token::Null, null))(input)
|
|
}
|
|
|
|
pub(crate) fn parse_separator(input: Span) -> IResult<Span, Token> {
|
|
preceded(
|
|
opt(horizontal_whitespace),
|
|
value(Token::Separator, separator),
|
|
)(input)
|
|
}
|
|
|
|
pub(crate) fn parse_bool(input: Span) -> IResult<Span, Token> {
|
|
preceded(optional, map(bool, Token::Boolean))(input)
|
|
}
|
|
|
|
pub(crate) fn parse_integer(input: Span) -> IResult<Span, Token> {
|
|
preceded(optional, map(integer, Token::Integer))(input)
|
|
}
|
|
|
|
pub(crate) fn parse_float(input: Span) -> IResult<Span, Token> {
|
|
preceded(optional, map(float, Token::Float))(input)
|
|
}
|
|
|
|
pub(crate) fn parse_identifier(input: Span) -> IResult<Span, Token> {
|
|
preceded(
|
|
optional,
|
|
map(identifier, |val| Token::String(val.to_string())),
|
|
)(input)
|
|
}
|
|
|
|
pub(crate) fn parse_string(input: Span) -> IResult<Span, Token> {
|
|
preceded(optional, map(string, |val| Token::String(val.to_string())))(input)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use nom::error::{Error, ErrorKind};
|
|
use nom::Err;
|
|
|
|
use super::*;
|
|
|
|
macro_rules! assert_ok {
|
|
($input:expr, $parser:ident, $remain:expr, $output:expr) => {{
|
|
let res = super::$parser(Span::from($input));
|
|
assert_eq!(
|
|
res.map(|(span, res)| { (*span, res) }),
|
|
Ok(($remain, $output))
|
|
);
|
|
}};
|
|
}
|
|
|
|
macro_rules! assert_err {
|
|
($input:expr, $parser:ident, $kind:expr) => {{
|
|
{
|
|
let input = Span::from($input);
|
|
assert_eq!(
|
|
super::$parser(input),
|
|
Err(Err::Error(Error::new(input, $kind)))
|
|
);
|
|
}
|
|
}};
|
|
}
|
|
|
|
fn check_parse_result<S: AsRef<str>, T: AsRef<[Token]>>(input: S, tokens: T) {
|
|
let tokens = tokens.as_ref();
|
|
let mut remaining = Span::from(input.as_ref());
|
|
let mut i = 0;
|
|
|
|
loop {
|
|
if remaining.fragment().is_empty() {
|
|
break;
|
|
}
|
|
|
|
let (span, token) =
|
|
super::parse_next_token(remaining).expect("failed to parse next token");
|
|
|
|
assert_eq!(Some(&token), tokens.get(i));
|
|
|
|
remaining = span;
|
|
i = i + 1;
|
|
}
|
|
|
|
assert_eq!(
|
|
tokens.len(),
|
|
i,
|
|
"tokens to check against were not exhausted"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_optional() {
|
|
assert_ok!("\n", whitespace, "", '\n');
|
|
assert_ok!("\t", whitespace, "", '\t');
|
|
assert_ok!(" ", whitespace, " ", ' ');
|
|
assert_ok!("/* foo bar */", comment, "", " foo bar ");
|
|
assert_ok!("// foo", comment, "", " foo");
|
|
assert_ok!("// foo\n", comment, "\n", " foo");
|
|
|
|
assert_ok!("", optional, "", ());
|
|
assert_ok!("\t\n", optional, "", ());
|
|
assert_ok!("\n\t", optional, "", ());
|
|
assert_ok!("// foo", optional, "", ());
|
|
assert_ok!("\n\t// foo\n\t/* foo\n\tbar */\n", optional, "", ());
|
|
}
|
|
|
|
#[test]
|
|
fn parse_integer() {
|
|
assert_ok!("3", integer, "", 3);
|
|
assert_ok!("12345", integer, "", 12345);
|
|
assert_ok!("-12345", integer, "", -12345);
|
|
assert_ok!("12345 ", integer, " ", 12345);
|
|
|
|
assert_err!(" 12345", integer, ErrorKind::Digit);
|
|
|
|
assert_ok!(" 12345", parse_integer, "", Token::Integer(12345));
|
|
assert_ok!("\n12345", parse_integer, "", Token::Integer(12345));
|
|
assert_ok!("\t12345", parse_integer, "", Token::Integer(12345));
|
|
}
|
|
|
|
#[test]
|
|
fn parse_float() {
|
|
assert_ok!("3", float, "", 3.0);
|
|
assert_ok!("3.0", float, "", 3.0);
|
|
assert_ok!("3.1415", float, "", 3.1415);
|
|
assert_ok!("-123.456789", float, "", -123.456789);
|
|
assert_err!(" 1.23", float, ErrorKind::Float);
|
|
assert_ok!("1.23 ", float, " ", 1.23);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_raw_string() {
|
|
assert_ok!("foo", identifier, "", "foo");
|
|
assert_ok!("foo123", identifier, "", "foo123");
|
|
assert_ok!("foo_bar", identifier, "", "foo_bar");
|
|
assert_ok!("_foo", identifier, "", "_foo");
|
|
assert_ok!("foo bar", identifier, " bar", "foo");
|
|
|
|
assert_err!("123", identifier, ErrorKind::Tag);
|
|
assert_err!("1foo", identifier, ErrorKind::Tag);
|
|
assert_err!("\"foo\"", identifier, ErrorKind::Tag);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_delimited_string() {
|
|
assert_ok!(r#""""#, delimited_string, "", "");
|
|
assert_ok!(r#""foo""#, delimited_string, "", "foo");
|
|
assert_ok!(r#""\"foo""#, delimited_string, "", r#"\"foo"#);
|
|
assert_ok!(r#""foo bar""#, delimited_string, "", "foo bar");
|
|
assert_ok!(r#""foo123""#, delimited_string, "", "foo123");
|
|
assert_ok!(r#""123foo""#, delimited_string, "", "123foo");
|
|
assert_ok!(r#""foo\"bar""#, delimited_string, "", "foo\\\"bar");
|
|
assert_ok!(r#""foo\\bar""#, delimited_string, "", "foo\\\\bar");
|
|
assert_ok!(r#""foo/bar""#, delimited_string, "", "foo/bar");
|
|
|
|
assert_err!("foo\"", delimited_string, ErrorKind::Char);
|
|
|
|
{
|
|
let input = Span::from("\"foo");
|
|
assert_eq!(
|
|
delimited_string(input),
|
|
Err(Err::Failure(Error::new(
|
|
unsafe { Span::new_from_raw_offset(4, 1, "", ()) },
|
|
ErrorKind::Char
|
|
)))
|
|
);
|
|
}
|
|
|
|
{
|
|
let input = Span::from("\"foo\nbar\"");
|
|
assert_eq!(
|
|
delimited_string(input),
|
|
Err(Err::Failure(Error::new(
|
|
unsafe { Span::new_from_raw_offset(4, 1, "\nbar\"", ()) },
|
|
ErrorKind::Char
|
|
)))
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn parse_line_comment() {
|
|
assert_ok!("// foo", line_comment, "", " foo");
|
|
assert_ok!("// foo\n", line_comment, "\n", " foo");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_block_comment() {
|
|
assert_ok!("/* foo */", block_comment, "", " foo ");
|
|
assert_ok!("/*\n\tfoo\nbar\n*/", block_comment, "", "\n\tfoo\nbar\n");
|
|
}
|
|
|
|
// Regression test for #1 (https://git.sclu1034.dev/lucas/serde_sjson/issues/1)
|
|
#[test]
|
|
fn parse_dtmt_config() {
|
|
let sjson = r#"
|
|
name = "test-mod"
|
|
description = "A dummy project to test things with"
|
|
version = "0.1.0"
|
|
|
|
packages = [
|
|
"packages/test-mod"
|
|
]
|
|
"#;
|
|
|
|
check_parse_result(
|
|
sjson,
|
|
[
|
|
Token::String(String::from("name")),
|
|
Token::Equals,
|
|
Token::String(String::from("test-mod")),
|
|
Token::String(String::from("description")),
|
|
Token::Equals,
|
|
Token::String(String::from("A dummy project to test things with")),
|
|
Token::String(String::from("version")),
|
|
Token::Equals,
|
|
Token::String(String::from("0.1.0")),
|
|
Token::String(String::from("packages")),
|
|
Token::Equals,
|
|
Token::ArrayStart,
|
|
Token::String(String::from("packages/test-mod")),
|
|
Token::ArrayEnd,
|
|
Token::Eof,
|
|
],
|
|
);
|
|
}
|
|
|
|
// Regression test for #2
|
|
#[test]
|
|
fn parse_windows_path() {
|
|
let text = "C:\\Users\\public\\test.txt";
|
|
let sjson = format!(r#""{}""#, text);
|
|
check_parse_result(sjson, [Token::String(String::from(text))]);
|
|
}
|
|
}
|