use nom::branch::alt; use nom::bytes::complete::{tag, take_until}; use nom::character::complete::{char, digit1, none_of, not_line_ending, one_of}; use nom::combinator::{cut, eof, map, map_res, opt, recognize, value}; use nom::multi::many1_count; use nom::number::complete::double; use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::{IResult, Slice}; use nom_locate::LocatedSpan; pub(crate) type Span<'a> = LocatedSpan<&'a str>; #[derive(Clone, Debug, PartialEq)] pub(crate) enum Token { ArrayEnd, ArrayStart, Boolean(bool), Eof, Equals, Float(f64), Integer(i64), Null, ObjectEnd, ObjectStart, Separator, String(String), } fn horizontal_whitespace(input: Span) -> IResult { one_of(" \t")(input) } fn whitespace(input: Span) -> IResult { one_of(" \n\r\t")(input) } fn null(input: Span) -> IResult { value((), tag("null"))(input) } fn separator(input: Span) -> IResult { map(alt((tag(","), tag("\n"), tag("\r\n"))), |val: Span| { *val.fragment() })(input) } fn bool(input: Span) -> IResult { alt((value(true, tag("true")), value(false, tag("false"))))(input) } fn integer(input: Span) -> IResult { map_res(recognize(tuple((opt(char('-')), digit1))), |val: Span| { val.fragment().parse::() })(input) } fn float(input: Span) -> IResult { double(input) } fn identifier(input: Span) -> IResult { map(recognize(many1_count(none_of("\" \t\n=:"))), |val: Span| { *val.fragment() })(input) } fn literal_string(input: Span) -> IResult { map( delimited(tag("\"\"\""), take_until("\"\"\""), tag("\"\"\"")), |val: Span| *val.fragment(), )(input) } fn string_content(input: Span) -> IResult { let buf = input.fragment(); let mut escaped = false; let mut i = 0; for (j, ch) in buf.char_indices() { i = j; match ch { '\\' if !escaped => { escaped = true; } '\n' if !escaped => { let err = nom::error::Error { input: input.slice(j..), code: nom::error::ErrorKind::Char, }; return Err(nom::Err::Error(err)); } '"' if !escaped => { return Ok((input.slice(j..), &buf[0..j])); } _ => escaped = false, } } let err = nom::error::Error { input: input.slice((i + 1)..), code: nom::error::ErrorKind::Char, }; Err(nom::Err::Failure(err)) } fn delimited_string(input: Span) -> IResult { preceded(char('"'), cut(terminated(string_content, char('"'))))(input) } fn string(input: Span) -> IResult { alt((identifier, literal_string, delimited_string))(input) } fn line_comment(input: Span) -> IResult { map( preceded(tag("//"), alt((not_line_ending, eof))), |val: Span| *val.fragment(), )(input) } fn block_comment(input: Span) -> IResult { map( delimited(tag("/*"), take_until("*/"), tag("*/")), |val: Span| *val.fragment(), )(input) } fn comment(input: Span) -> IResult { alt((line_comment, block_comment))(input) } fn optional(input: Span) -> IResult { let whitespace = value((), whitespace); let comment = value((), comment); let empty = value((), tag("")); let content = value((), many1_count(alt((whitespace, comment)))); alt((content, empty))(input) } pub(crate) fn parse_next_token(input: Span) -> IResult { preceded( opt(optional), alt(( // Order is important here. // Certain valid strings like "null", "true" or "false" need to be // matched to their special value. // Integer-like numbers need to be matched to that, but are valid floats, too. value(Token::Eof, eof), value(Token::Separator, separator), value(Token::ObjectStart, tag("{")), value(Token::ObjectEnd, tag("}")), value(Token::ArrayStart, tag("[")), value(Token::ArrayEnd, tag("]")), value(Token::Equals, tag("=")), value(Token::Null, null), map(bool, Token::Boolean), map(integer, Token::Integer), map(float, Token::Float), map(string, |val| Token::String(val.to_string())), )), )(input) } pub(crate) fn parse_trailing_characters(input: Span) -> IResult { value((), optional)(input) } pub(crate) fn parse_null(input: Span) -> IResult { preceded(optional, value(Token::Null, null))(input) } pub(crate) fn parse_separator(input: Span) -> IResult { preceded( opt(horizontal_whitespace), value(Token::Separator, separator), )(input) } pub(crate) fn parse_bool(input: Span) -> IResult { preceded(optional, map(bool, Token::Boolean))(input) } pub(crate) fn parse_integer(input: Span) -> IResult { preceded(optional, map(integer, Token::Integer))(input) } pub(crate) fn parse_float(input: Span) -> IResult { preceded(optional, map(float, Token::Float))(input) } pub(crate) fn parse_identifier(input: Span) -> IResult { preceded( optional, map(identifier, |val| Token::String(val.to_string())), )(input) } pub(crate) fn parse_string(input: Span) -> IResult { preceded(optional, map(string, |val| Token::String(val.to_string())))(input) } #[cfg(test)] mod test { use nom::error::{Error, ErrorKind}; use nom::Err; use super::*; macro_rules! assert_ok { ($input:expr, $parser:ident, $remain:expr, $output:expr) => {{ let res = super::$parser(Span::from($input)); assert_eq!( res.map(|(span, res)| { (*span, res) }), Ok(($remain, $output)) ); }}; } macro_rules! assert_err { ($input:expr, $parser:ident, $kind:expr) => {{ { let input = Span::from($input); assert_eq!( super::$parser(input), Err(Err::Error(Error::new(input, $kind))) ); } }}; } fn check_parse_result, T: AsRef<[Token]>>(input: S, tokens: T) { let tokens = tokens.as_ref(); let mut remaining = Span::from(input.as_ref()); let mut i = 0; loop { if remaining.fragment().is_empty() { break; } let (span, token) = super::parse_next_token(remaining).expect("failed to parse next token"); assert_eq!(Some(&token), tokens.get(i)); remaining = span; i = i + 1; } assert_eq!( tokens.len(), i, "tokens to check against were not exhausted" ); } #[test] fn parse_optional() { assert_ok!("\n", whitespace, "", '\n'); assert_ok!("\t", whitespace, "", '\t'); assert_ok!(" ", whitespace, " ", ' '); assert_ok!("/* foo bar */", comment, "", " foo bar "); assert_ok!("// foo", comment, "", " foo"); assert_ok!("// foo\n", comment, "\n", " foo"); assert_ok!("", optional, "", ()); assert_ok!("\t\n", optional, "", ()); assert_ok!("\n\t", optional, "", ()); assert_ok!("// foo", optional, "", ()); assert_ok!("\n\t// foo\n\t/* foo\n\tbar */\n", optional, "", ()); } #[test] fn parse_integer() { assert_ok!("3", integer, "", 3); assert_ok!("12345", integer, "", 12345); assert_ok!("-12345", integer, "", -12345); assert_ok!("12345 ", integer, " ", 12345); assert_err!(" 12345", integer, ErrorKind::Digit); assert_ok!(" 12345", parse_integer, "", Token::Integer(12345)); assert_ok!("\n12345", parse_integer, "", Token::Integer(12345)); assert_ok!("\t12345", parse_integer, "", Token::Integer(12345)); } #[test] fn parse_float() { assert_ok!("3", float, "", 3.0); assert_ok!("3.0", float, "", 3.0); assert_ok!("3.1415", float, "", 3.1415); assert_ok!("-123.456789", float, "", -123.456789); assert_err!(" 1.23", float, ErrorKind::Float); assert_ok!("1.23 ", float, " ", 1.23); } #[test] fn parse_raw_string() { assert_ok!("foo", identifier, "", "foo"); assert_ok!("foo123", identifier, "", "foo123"); assert_ok!("foo_bar", identifier, "", "foo_bar"); assert_ok!("_foo", identifier, "", "_foo"); assert_ok!("foo bar", identifier, " bar", "foo"); assert_ok!("123", identifier, "", "123"); assert_ok!("1foo", identifier, "", "1foo"); assert_ok!("foo-bar", identifier, "", "foo-bar"); assert_ok!("foo/bar", identifier, "", "foo/bar"); assert_ok!("foo\"", identifier, "\"", "foo"); assert_err!("\"foo", identifier, ErrorKind::Many1Count); assert_err!("\"foo\"", identifier, ErrorKind::Many1Count); } #[test] fn parse_delimited_string() { assert_ok!(r#""""#, delimited_string, "", ""); assert_ok!(r#""foo""#, delimited_string, "", "foo"); assert_ok!(r#""\"foo""#, delimited_string, "", r#"\"foo"#); assert_ok!(r#""foo bar""#, delimited_string, "", "foo bar"); assert_ok!(r#""foo123""#, delimited_string, "", "foo123"); assert_ok!(r#""123foo""#, delimited_string, "", "123foo"); assert_ok!(r#""foo\"bar""#, delimited_string, "", "foo\\\"bar"); assert_ok!(r#""foo\\bar""#, delimited_string, "", "foo\\\\bar"); assert_ok!(r#""foo/bar""#, delimited_string, "", "foo/bar"); assert_err!("foo\"", delimited_string, ErrorKind::Char); { let input = Span::from("\"foo"); assert_eq!( delimited_string(input), Err(Err::Failure(Error::new( unsafe { Span::new_from_raw_offset(4, 1, "", ()) }, ErrorKind::Char ))) ); } { let input = Span::from("\"foo\nbar\""); assert_eq!( delimited_string(input), Err(Err::Failure(Error::new( unsafe { Span::new_from_raw_offset(4, 1, "\nbar\"", ()) }, ErrorKind::Char ))) ); } } #[test] fn parse_literal_string() { assert_ok!(r#""""""""#, literal_string, "", ""); assert_ok!(r#""""foo""""#, literal_string, "", "foo"); assert_ok!(r#""""foo"""""#, literal_string, "\"", "foo"); assert_ok!(r#"""""foo""""#, literal_string, "", "\"foo"); assert_ok!(r#""""\n""""#, literal_string, "", "\\n"); { let raw = r#" This is a lengthy description! It contains line breaks. Escape sequences, like \n and \t, are parsed literally. "Quoted strings are fine", so are two sucessive quotes: "". "#; let input = format!(r#""""{}""""#, raw); assert_ok!(input.as_str(), literal_string, "", raw); } { let input = Span::from(r#"""""""#); assert_eq!( literal_string(input), Err(Err::Error(Error::new( unsafe { Span::new_from_raw_offset(3, 1, "\"\"", ()) }, ErrorKind::TakeUntil ))) ); } } #[test] fn parse_line_comment() { assert_ok!("// foo", line_comment, "", " foo"); assert_ok!("// foo\n", line_comment, "\n", " foo"); } #[test] fn parse_block_comment() { assert_ok!("/* foo */", block_comment, "", " foo "); assert_ok!("/*\n\tfoo\nbar\n*/", block_comment, "", "\n\tfoo\nbar\n"); } // Regression test for #1 (https://git.sclu1034.dev/lucas/serde_sjson/issues/1) #[test] fn parse_dtmt_config() { let sjson = r#" name = "test-mod" description = "A dummy project to test things with" version = "0.1.0" packages = [ "packages/test-mod" ] "#; check_parse_result( sjson, [ Token::String(String::from("name")), Token::Equals, Token::String(String::from("test-mod")), Token::String(String::from("description")), Token::Equals, Token::String(String::from("A dummy project to test things with")), Token::String(String::from("version")), Token::Equals, Token::String(String::from("0.1.0")), Token::String(String::from("packages")), Token::Equals, Token::ArrayStart, Token::String(String::from("packages/test-mod")), Token::ArrayEnd, Token::Eof, ], ); } // Regression test for #2 #[test] fn parse_windows_path() { let text = "C:\\Users\\public\\test.txt"; let sjson = format!(r#""{}""#, text); check_parse_result(sjson, [Token::String(String::from(text))]); } // Regression test for #10 #[test] fn parse_crlf_separator() { let sjson = "foo = 1\r\nbar = 2"; check_parse_result( sjson, [ Token::String(String::from("foo")), Token::Equals, Token::Integer(1), Token::String(String::from("bar")), Token::Equals, Token::Integer(2), ], ); } }