From 5ec97dab43a7d9b27c30f17621063df48a399a54 Mon Sep 17 00:00:00 2001 From: Lucas Schwiderski Date: Thu, 21 Mar 2024 10:03:15 +0100 Subject: [PATCH] Implement generic writer target Implements `to_writer` and `to_vec` functions. --- CHANGELOG.adoc | 2 + src/error.rs | 10 +- src/lib.rs | 2 +- src/ser.rs | 284 +++++++++++++++++++++++++-------------------- tests/serialize.rs | 9 +- 5 files changed, 176 insertions(+), 131 deletions(-) diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 31d280c..9edebbe 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -8,6 +8,8 @@ === Added +- implement serializing into generic `io::Write` + === Fixed - fix parsing CRLF diff --git a/src/error.rs b/src/error.rs index 621ad60..feee003 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,4 @@ -use std::fmt; +use std::{fmt, io}; use crate::parser::Token; @@ -38,6 +38,7 @@ pub(crate) enum ErrorCode { ExpectedTopLevelObject, ExpectedValue, TrailingCharacters, + NonFiniteFloat, } impl fmt::Display for ErrorCode { @@ -64,6 +65,7 @@ impl fmt::Display for ErrorCode { ErrorCode::ExpectedTopLevelObject => f.write_str("expected object at the top level"), ErrorCode::ExpectedValue => f.write_str("expected a value"), ErrorCode::TrailingCharacters => f.write_str("unexpected trailing characters"), + ErrorCode::NonFiniteFloat => f.write_str("got infinite floating point number"), } } } @@ -166,3 +168,9 @@ impl Error { } } } + +impl From for Error { + fn from(err: io::Error) -> Self { + Self::new(ErrorCode::Message(format!("{}", err)), 0, 0, None) + } +} diff --git a/src/lib.rs b/src/lib.rs index 440118a..ea30715 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,4 +5,4 @@ mod ser; pub use de::{from_str, Deserializer}; pub use error::{Error, Result}; -pub use ser::{to_string, Serializer}; +pub use ser::{to_string, to_vec, to_writer, Serializer}; diff --git a/src/ser.rs b/src/ser.rs index 5674195..532f78f 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -1,36 +1,75 @@ +use std::io; + use serde::Serialize; use crate::error::{Error, ErrorCode, Result}; // TODO: Make configurable -const INDENT: &str = " "; +const INDENT: [u8; 2] = [0x20, 0x20]; -pub struct Serializer { +pub struct Serializer { // The current indentation level level: usize, - // The output string - output: String, + writer: W, } +#[inline] +pub fn to_writer(writer: &mut W, value: &T) -> Result<()> +where + W: io::Write, + T: Serialize, +{ + let mut serializer = Serializer::new(writer); + value.serialize(&mut serializer) +} + +#[inline] +pub fn to_vec(value: &T) -> Result> +where + T: Serialize, +{ + let mut vec = Vec::with_capacity(128); + to_writer(&mut vec, value)?; + Ok(vec) +} + +#[inline] pub fn to_string(value: &T) -> Result where T: Serialize, { - let mut serializer = Serializer { - level: 0, - output: String::new(), + let vec = to_vec(value)?; + let string = if cfg!(debug_assertions) { + String::from_utf8(vec).expect("We do not emit invalid UTF-8") + } else { + unsafe { String::from_utf8_unchecked(vec) } }; - value.serialize(&mut serializer)?; - Ok(serializer.output) + Ok(string) } -impl Serializer { - fn add_indent(&mut self) { - for _ in 0..self.level.saturating_sub(1) { - self.output += INDENT; - } +impl Serializer +where + W: io::Write, +{ + pub fn new(writer: W) -> Self { + Self { level: 0, writer } } + #[inline] + fn write(&mut self, bytes: impl AsRef<[u8]>) -> Result<()> { + self.writer.write_all(bytes.as_ref()).map_err(Error::from) + } + + #[inline] + fn add_indent(&mut self) -> Result<()> { + for _ in 0..self.level.saturating_sub(1) { + self.write(INDENT)?; + } + + Ok(()) + } + + #[inline] fn ensure_top_level_struct(&self) -> Result<()> { if self.level == 0 { return Err(Error::new(ErrorCode::ExpectedTopLevelObject, 0, 0, None)); @@ -40,7 +79,10 @@ impl Serializer { } } -impl<'a> serde::ser::Serializer for &'a mut Serializer { +impl<'a, W> serde::ser::Serializer for &'a mut Serializer +where + W: io::Write, +{ type Ok = (); type Error = Error; @@ -54,7 +96,7 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { fn serialize_bool(self, v: bool) -> Result { self.ensure_top_level_struct()?; - self.output += if v { "true" } else { "false" }; + self.write(if v { "true" } else { "false" })?; Ok(()) } @@ -72,8 +114,7 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { fn serialize_i64(self, v: i64) -> Result { self.ensure_top_level_struct()?; - self.output += &v.to_string(); - Ok(()) + self.serialize_str(&format!("{}", v)) } fn serialize_u8(self, v: u8) -> Result { @@ -90,33 +131,25 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { fn serialize_u64(self, v: u64) -> Result { self.ensure_top_level_struct()?; - self.output += &v.to_string(); - Ok(()) + self.serialize_str(&format!("{}", v)) } fn serialize_f32(self, v: f32) -> Result { - if v.is_finite() { - self.serialize_f64(v.into()) - } else { - self.ensure_top_level_struct()?; - self.output += "null"; - Ok(()) - } + self.serialize_f64(v.into()) } fn serialize_f64(self, v: f64) -> Result { self.ensure_top_level_struct()?; - if v.is_finite() { - self.output += &v.to_string(); - } else { - self.output += "null"; + if !v.is_finite() { + return Err(Error::new(ErrorCode::NonFiniteFloat, 0, 0, None)); } - Ok(()) + + self.serialize_str(&format!("{}", v)) } fn serialize_char(self, v: char) -> Result { let mut buf = [0; 4]; - self.serialize_str(v.encode_utf8(&mut buf)) + self.serialize_bytes(v.encode_utf8(&mut buf).as_bytes()) } fn serialize_str(self, v: &str) -> Result { @@ -126,45 +159,52 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { v.is_empty() || v.contains([' ', '\n', '\r', '\t', '=', '\'', '"', '\\', ':']); if needs_quotes { - self.output += "\""; + self.write(b"\"")?; + // Since we've added a layer of quotes, we now need to escape + // certain characters. for c in v.chars() { match c { '\t' => { - self.output.push('\\'); - self.output.push('t'); + self.write(b"\\")?; + self.write(b"t")?; } '\n' => { - self.output.push('\\'); - self.output.push('n'); + self.write(b"\\")?; + self.write(b"n")?; } '\r' => { - self.output.push('\\'); - self.output.push('r'); + self.write(b"\\")?; + self.write(b"r")?; } '"' => { - self.output.push('\\'); - self.output.push('"'); + self.write(b"\\")?; + self.write(b"\"")?; } '\\' => { - self.output.push('\\'); - self.output.push('\\'); + self.write(b"\\")?; + self.write(b"\\")?; } c => { - self.output.push(c); + self.serialize_char(c)?; } }; } - self.output += "\""; + self.write(b"\"")?; } else { - self.output += v; + self.write(v.as_bytes())?; } + Ok(()) } - fn serialize_bytes(self, _v: &[u8]) -> Result { - todo!() + fn serialize_bytes(self, v: &[u8]) -> Result { + self.ensure_top_level_struct()?; + // For now we assume that the byte array contains + // valid SJSON. + // TODO: Turn this into an actual array of encoded bytes. + self.write(v) } fn serialize_none(self) -> Result { @@ -184,8 +224,7 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { fn serialize_unit(self) -> Result { self.ensure_top_level_struct()?; - self.output += "null"; - Ok(()) + self.write(b"null") } fn serialize_unit_struct(self, _name: &'static str) -> Result { @@ -223,19 +262,18 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { { self.ensure_top_level_struct()?; - self.output += "{ "; + self.write(b"{ ")?; variant.serialize(&mut *self)?; - self.output += " = "; + self.write(b" = ")?; value.serialize(&mut *self)?; - self.output += " }\n"; - Ok(()) + self.write(b" }") } // Serialize the start of a sequence. fn serialize_seq(self, _len: Option) -> Result { self.ensure_top_level_struct()?; - self.output += "[\n"; + self.write(b"[\n")?; self.level += 1; Ok(self) } @@ -266,7 +304,7 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { variant.serialize(&mut *self)?; - self.output += " = [\n"; + self.write(b" = [\n")?; self.level += 1; Ok(self) @@ -274,7 +312,7 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { fn serialize_map(self, _len: Option) -> Result { if self.level > 0 { - self.output += "{\n"; + self.write(b"{\n")?; } self.level += 1; Ok(self) @@ -296,7 +334,7 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { variant.serialize(&mut *self)?; - self.output += " = {\n"; + self.write(b" = {\n")?; self.level += 1; Ok(self) @@ -310,7 +348,10 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer { } } -impl<'a> serde::ser::SerializeSeq for &'a mut Serializer { +impl<'a, W> serde::ser::SerializeSeq for &'a mut Serializer +where + W: io::Write, +{ type Ok = (); type Error = Error; @@ -318,23 +359,22 @@ impl<'a> serde::ser::SerializeSeq for &'a mut Serializer { where T: Serialize, { - self.add_indent(); + self.add_indent()?; value.serialize(&mut **self)?; - if !self.output.ends_with('\n') { - self.output += "\n"; - } - Ok(()) + self.write(b"\n") } fn end(self) -> Result { self.level -= 1; - self.add_indent(); - self.output += "]\n"; - Ok(()) + self.add_indent()?; + self.write(b"]") } } -impl<'a> serde::ser::SerializeTuple for &'a mut Serializer { +impl<'a, W> serde::ser::SerializeTuple for &'a mut Serializer +where + W: io::Write, +{ type Ok = (); type Error = Error; @@ -342,23 +382,22 @@ impl<'a> serde::ser::SerializeTuple for &'a mut Serializer { where T: Serialize, { - self.add_indent(); + self.add_indent()?; value.serialize(&mut **self)?; - if !self.output.ends_with('\n') { - self.output += "\n"; - } - Ok(()) + self.write(b"\n") } fn end(self) -> Result { self.level -= 1; - self.add_indent(); - self.output += "]\n"; - Ok(()) + self.add_indent()?; + self.write(b"]") } } -impl<'a> serde::ser::SerializeTupleStruct for &'a mut Serializer { +impl<'a, W> serde::ser::SerializeTupleStruct for &'a mut Serializer +where + W: io::Write, +{ type Ok = (); type Error = Error; @@ -366,23 +405,22 @@ impl<'a> serde::ser::SerializeTupleStruct for &'a mut Serializer { where T: Serialize, { - self.add_indent(); + self.add_indent()?; value.serialize(&mut **self)?; - if !self.output.ends_with('\n') { - self.output += "\n"; - } - Ok(()) + self.write(b"\n") } fn end(self) -> Result { self.level -= 1; - self.add_indent(); - self.output += "]\n"; - Ok(()) + self.add_indent()?; + self.write(b"]") } } -impl<'a> serde::ser::SerializeTupleVariant for &'a mut Serializer { +impl<'a, W> serde::ser::SerializeTupleVariant for &'a mut Serializer +where + W: io::Write, +{ type Ok = (); type Error = Error; @@ -390,28 +428,31 @@ impl<'a> serde::ser::SerializeTupleVariant for &'a mut Serializer { where T: Serialize, { - self.add_indent(); + self.add_indent()?; value.serialize(&mut **self)?; - if !self.output.ends_with('\n') { - self.output += "\n"; - } - Ok(()) + self.write(b"\n") } fn end(self) -> Result { self.level -= 1; - self.add_indent(); - self.output += "]\n"; + self.add_indent()?; + self.write(b"]\n")?; + self.level -= 1; + if self.level > 0 { - self.add_indent(); - self.output += "}\n"; + self.add_indent()?; + self.write(b"}")?; } + Ok(()) } } -impl<'a> serde::ser::SerializeMap for &'a mut Serializer { +impl<'a, W> serde::ser::SerializeMap for &'a mut Serializer +where + W: io::Write, +{ type Ok = (); type Error = Error; @@ -419,7 +460,7 @@ impl<'a> serde::ser::SerializeMap for &'a mut Serializer { where T: Serialize, { - self.add_indent(); + self.add_indent()?; key.serialize(&mut **self) } @@ -430,25 +471,25 @@ impl<'a> serde::ser::SerializeMap for &'a mut Serializer { // It doesn't make a difference where the `=` is added. But doing it here // means `serialize_key` is only a call to a different function, which should // have greater optimization potential for the compiler. - self.output += " = "; + self.write(b" = ")?; value.serialize(&mut **self)?; - if !self.output.ends_with('\n') { - self.output += "\n"; - } - Ok(()) + self.write(b"\n") } fn end(self) -> Result { if self.level > 1 { self.level -= 1; - self.add_indent(); - self.output += "}\n"; + self.add_indent()?; + self.write(b"}")?; } Ok(()) } } -impl<'a> serde::ser::SerializeStruct for &'a mut Serializer { +impl<'a, W> serde::ser::SerializeStruct for &'a mut Serializer +where + W: io::Write, +{ type Ok = (); type Error = Error; @@ -456,29 +497,29 @@ impl<'a> serde::ser::SerializeStruct for &'a mut Serializer { where T: Serialize, { - self.add_indent(); + self.add_indent()?; key.serialize(&mut **self)?; - self.output += " = "; + self.write(b" = ")?; value.serialize(&mut **self)?; - if !self.output.ends_with('\n') { - self.output += "\n"; - } - Ok(()) + self.write(b"\n") } fn end(self) -> Result { if self.level > 1 { self.level -= 1; - self.add_indent(); - self.output += "}\n"; + self.add_indent()?; + self.write(b"}")?; } Ok(()) } } -impl<'a> serde::ser::SerializeStructVariant for &'a mut Serializer { +impl<'a, W> serde::ser::SerializeStructVariant for &'a mut Serializer +where + W: std::io::Write, +{ type Ok = (); type Error = Error; @@ -486,21 +527,18 @@ impl<'a> serde::ser::SerializeStructVariant for &'a mut Serializer { where T: Serialize, { - self.add_indent(); + self.add_indent()?; key.serialize(&mut **self)?; - self.output += " = "; + self.write(b" = ")?; value.serialize(&mut **self)?; - if !self.output.ends_with('\n') { - self.output += "\n"; - } - Ok(()) + self.write(b"\n") } fn end(self) -> Result { if self.level > 0 { self.level -= 1; - self.add_indent(); - self.output += "}\n"; + self.add_indent()?; + self.write(b"}")?; } Ok(()) } diff --git a/tests/serialize.rs b/tests/serialize.rs index 20854d0..7afb926 100644 --- a/tests/serialize.rs +++ b/tests/serialize.rs @@ -1,4 +1,4 @@ -use serde_sjson::to_string; +use serde_sjson::{to_string, Error}; #[test] fn serialize_null() { @@ -81,17 +81,14 @@ fn serialize_non_representable_floats() { } let tests = [std::f64::NAN, std::f64::INFINITY, std::f64::NEG_INFINITY]; - let expected = String::from("value = null\n"); for value in tests { let value = Value64 { value }; - let actual = to_string(&value).unwrap(); - assert_eq!(actual, expected); + assert!(to_string(&value).is_err()); } let tests = [std::f32::NAN, std::f32::INFINITY, std::f32::NEG_INFINITY]; for value in tests { let value = Value32 { value }; - let actual = to_string(&value).unwrap(); - assert_eq!(actual, expected); + assert!(to_string(&value).is_err()); } }