1
Fork 0
serde_sjson/src/parser.rs

305 lines
9.1 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::{escaped, tag, take_until};
use nom::character::complete::{
alpha1, alphanumeric1, char, digit1, none_of, not_line_ending, one_of,
};
use nom::combinator::{cut, eof, map, map_res, opt, recognize, value};
use nom::multi::{many0_count, many1_count};
use nom::number::complete::double;
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::IResult;
use nom_locate::LocatedSpan;
pub(crate) type Span<'a> = LocatedSpan<&'a str>;
#[derive(Clone, Debug, PartialEq)]
pub(crate) enum Token {
ArrayEnd,
ArrayStart,
Boolean(bool),
Eof,
Equals,
Float(f64),
Integer(i64),
Null,
ObjectEnd,
ObjectStart,
Separator,
String(String),
}
fn horizontal_whitespace(input: Span) -> IResult<Span, char> {
one_of(" \t")(input)
}
fn whitespace(input: Span) -> IResult<Span, char> {
one_of(" \n\r\t")(input)
}
fn null(input: Span) -> IResult<Span, ()> {
value((), tag("null"))(input)
}
fn separator(input: Span) -> IResult<Span, &str> {
map(alt((tag(","), tag("\n"))), |val: Span| *val.fragment())(input)
}
fn bool(input: Span) -> IResult<Span, bool> {
alt((value(true, tag("true")), value(false, tag("false"))))(input)
}
fn integer(input: Span) -> IResult<Span, i64> {
map_res(recognize(tuple((opt(char('-')), digit1))), |val: Span| {
val.fragment().parse::<i64>()
})(input)
}
fn float(input: Span) -> IResult<Span, f64> {
double(input)
}
fn identifier(input: Span) -> IResult<Span, &str> {
let leading = alt((alpha1, tag("_")));
let trailing = many0_count(alt((alphanumeric1, tag("_"))));
let ident = pair(leading, trailing);
map(recognize(ident), |val: Span| *val.fragment())(input)
}
fn string_content(input: Span) -> IResult<Span, &str> {
// TODO: Handle Unicode escapes
map(
alt((
escaped(none_of("\n\\\""), '\\', one_of(r#""rtn\"#)),
tag(""),
)),
|val: Span| *val.fragment(),
)(input)
}
fn delimited_string(input: Span) -> IResult<Span, &str> {
preceded(char('"'), cut(terminated(string_content, char('"'))))(input)
}
fn string(input: Span) -> IResult<Span, &str> {
alt((identifier, delimited_string))(input)
}
fn line_comment(input: Span) -> IResult<Span, &str> {
map(
preceded(tag("//"), alt((not_line_ending, eof))),
|val: Span| *val.fragment(),
)(input)
}
fn block_comment(input: Span) -> IResult<Span, &str> {
map(
delimited(tag("/*"), take_until("*/"), tag("*/")),
|val: Span| *val.fragment(),
)(input)
}
fn comment(input: Span) -> IResult<Span, &str> {
alt((line_comment, block_comment))(input)
}
fn optional(input: Span) -> IResult<Span, ()> {
let whitespace = value((), whitespace);
let comment = value((), comment);
let empty = value((), tag(""));
let content = value((), many1_count(alt((whitespace, comment))));
alt((content, empty))(input)
}
pub(crate) fn parse_next_token(input: Span) -> IResult<Span, Token> {
preceded(
opt(optional),
alt((
// Order is important here.
// Certain valid strings like "null", "true" or "false" need to be
// matched to their special value.
// Integer-like numbers need to be matched to that, but are valid floats, too.
value(Token::Eof, eof),
value(Token::Separator, separator),
value(Token::ObjectStart, tag("{")),
value(Token::ObjectEnd, tag("}")),
value(Token::ArrayStart, tag("[")),
value(Token::ArrayEnd, tag("]")),
value(Token::Equals, tag("=")),
value(Token::Null, null),
map(bool, Token::Boolean),
map(integer, Token::Integer),
map(float, Token::Float),
map(string, |val| Token::String(val.to_string())),
)),
)(input)
}
pub(crate) fn parse_trailing_characters(input: Span) -> IResult<Span, ()> {
value((), optional)(input)
}
pub(crate) fn parse_null(input: Span) -> IResult<Span, Token> {
preceded(optional, value(Token::Null, null))(input)
}
pub(crate) fn parse_separator(input: Span) -> IResult<Span, Token> {
preceded(
opt(horizontal_whitespace),
value(Token::Separator, separator),
)(input)
}
pub(crate) fn parse_bool(input: Span) -> IResult<Span, Token> {
preceded(optional, map(bool, Token::Boolean))(input)
}
pub(crate) fn parse_integer(input: Span) -> IResult<Span, Token> {
preceded(optional, map(integer, Token::Integer))(input)
}
pub(crate) fn parse_float(input: Span) -> IResult<Span, Token> {
preceded(optional, map(float, Token::Float))(input)
}
pub(crate) fn parse_identifier(input: Span) -> IResult<Span, Token> {
preceded(
optional,
map(identifier, |val| Token::String(val.to_string())),
)(input)
}
pub(crate) fn parse_string(input: Span) -> IResult<Span, Token> {
preceded(optional, map(string, |val| Token::String(val.to_string())))(input)
}
#[cfg(test)]
mod test {
use nom::error::{Error, ErrorKind};
use nom::Err;
use super::*;
macro_rules! assert_ok {
($input:expr, $parser:ident, $remain:expr, $output:expr) => {{
let res = super::$parser(Span::from($input));
assert_eq!(
res.map(|(span, res)| { (*span, res) }),
Ok(($remain, $output))
);
}};
}
macro_rules! assert_err {
($input:expr, $parser:ident, $kind:expr) => {{
{
let input = Span::from($input);
assert_eq!(
super::$parser(input),
Err(Err::Error(Error::new(input, $kind)))
);
}
}};
}
#[test]
fn parse_optional() {
assert_ok!("\n", whitespace, "", '\n');
assert_ok!("\t", whitespace, "", '\t');
assert_ok!(" ", whitespace, " ", ' ');
assert_ok!("/* foo bar */", comment, "", " foo bar ");
assert_ok!("// foo", comment, "", " foo");
assert_ok!("// foo\n", comment, "\n", " foo");
assert_ok!("", optional, "", ());
assert_ok!("\t\n", optional, "", ());
assert_ok!("\n\t", optional, "", ());
assert_ok!("// foo", optional, "", ());
assert_ok!("\n\t// foo\n\t/* foo\n\tbar */\n", optional, "", ());
}
#[test]
fn parse_integer() {
assert_ok!("3", integer, "", 3);
assert_ok!("12345", integer, "", 12345);
assert_ok!("-12345", integer, "", -12345);
assert_ok!("12345 ", integer, " ", 12345);
assert_err!(" 12345", integer, ErrorKind::Digit);
assert_ok!(" 12345", parse_integer, "", Token::Integer(12345));
assert_ok!("\n12345", parse_integer, "", Token::Integer(12345));
assert_ok!("\t12345", parse_integer, "", Token::Integer(12345));
}
#[test]
fn parse_float() {
assert_ok!("3", float, "", 3.0);
assert_ok!("3.0", float, "", 3.0);
assert_ok!("3.1415", float, "", 3.1415);
assert_ok!("-123.456789", float, "", -123.456789);
assert_err!(" 1.23", float, ErrorKind::Float);
assert_ok!("1.23 ", float, " ", 1.23);
}
#[test]
fn parse_raw_string() {
assert_ok!("foo", identifier, "", "foo");
assert_ok!("foo123", identifier, "", "foo123");
assert_ok!("foo_bar", identifier, "", "foo_bar");
assert_ok!("_foo", identifier, "", "_foo");
assert_ok!("foo bar", identifier, " bar", "foo");
assert_err!("123", identifier, ErrorKind::Tag);
assert_err!("1foo", identifier, ErrorKind::Tag);
assert_err!("\"foo\"", identifier, ErrorKind::Tag);
}
#[test]
fn parse_delimited_string() {
assert_ok!(r#""""#, delimited_string, "", "");
assert_ok!(r#""foo""#, delimited_string, "", "foo");
assert_ok!(r#""\"foo""#, delimited_string, "", r#"\"foo"#);
assert_ok!(r#""foo bar""#, delimited_string, "", "foo bar");
assert_ok!(r#""foo123""#, delimited_string, "", "foo123");
assert_ok!(r#""123foo""#, delimited_string, "", "123foo");
assert_ok!(r#""foo\"bar""#, delimited_string, "", "foo\\\"bar");
assert_err!("foo\"", delimited_string, ErrorKind::Char);
{
let input = Span::from("\"foo");
assert_eq!(
delimited_string(input),
Err(Err::Failure(Error::new(
unsafe { Span::new_from_raw_offset(4, 1, "", ()) },
ErrorKind::Char
)))
);
}
{
let input = Span::from("\"foo\nbar\"");
assert_eq!(
delimited_string(input),
Err(Err::Failure(Error::new(
unsafe { Span::new_from_raw_offset(4, 1, "\nbar\"", ()) },
ErrorKind::Char
)))
);
}
}
#[test]
fn parse_line_comment() {
assert_ok!("// foo", line_comment, "", " foo");
assert_ok!("// foo\n", line_comment, "\n", " foo");
}
#[test]
fn parse_block_comment() {
assert_ok!("/* foo */", block_comment, "", " foo ");
assert_ok!("/*\n\tfoo\nbar\n*/", block_comment, "", "\n\tfoo\nbar\n");
}
}