1
Fork 0

fix: Fix serializing Unicode

The index operator doesn't use the `char` boundaries, but rather
byte boundaries. So I switched back to a simpler, but slightly
less efficient loop that simply adds individual characters to the
output.

It also doesn't escape Unicode anymore, as this shouldn't be an issue
in UTF-8 encoded output files.
This commit is contained in:
Lucas Schwiderski 2022-12-28 19:44:53 +01:00
parent ded56befb2
commit f76acf5407
Signed by: lucas
GPG key ID: AA12679AAA6DF4D8
2 changed files with 10 additions and 23 deletions

View file

@ -6,6 +6,10 @@
== [Unreleased]
=== Fixed
- fix serializing Unicode
== [v0.2.0] - 2022-11-25
=== Added

View file

@ -121,48 +121,31 @@ impl<'a> serde::ser::Serializer for &'a mut Serializer {
fn serialize_str(self, v: &str) -> Result<Self::Ok> {
self.ensure_top_level_struct()?;
let needs_escapes =
v.is_empty() || v.contains([' ', '\n', '\r', '\t', '=', '\'', '"', '\\', '/']);
if needs_escapes {
self.output += "\"";
let len = v.len();
let chars = v.chars();
let mut start = 0;
for (i, c) in chars.enumerate() {
if ('\x20'..='\x7e').contains(&c)
&& !['\t', '\n', '\r', '\"', '\\', '/'].contains(&c)
{
continue;
}
self.output += &v[start..i];
self.output.push('\\');
for c in v.chars() {
match c {
'\t' => {
self.output.push('\\');
self.output.push('t');
}
'\n' => {
self.output.push('\\');
self.output.push('n');
}
'\r' => {
self.output.push('\\');
self.output.push('r');
}
'\x7f'.. => {
self.output += &format!("u{:4x}", c as u32);
}
c => {
self.output.push(c);
}
};
start = i + 1;
}
if start < len {
self.output += &v[start..];
}
self.output += "\"";