bencode inspired tight self describing serialization format
init
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | Cargo.toml | 15 | ||||
| -rw-r--r-- | fuzz/.gitignore | 5 | ||||
| -rw-r--r-- | fuzz/Cargo.toml | 20 | ||||
| -rw-r--r-- | fuzz/fuzz_targets/from_slice.rs | 13 | ||||
| -rw-r--r-- | src/de.rs | 545 | ||||
| -rw-r--r-- | src/de/tests.rs | 101 | ||||
| -rw-r--r-- | src/error.rs | 52 | ||||
| -rw-r--r-- | src/lib.rs | 8 | ||||
| -rw-r--r-- | src/ser.rs | 470 | ||||
| -rw-r--r-- | src/ser/tests.rs | 107 | ||||
| -rw-r--r-- | src/serde.rs | 26 |
12 files changed, 1364 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4d4ff23 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "bendncode" +version = "0.1.0" +edition = "2024" + +[dependencies] +atools = "0.1.12" +raad = { version = "0.1.3" } +serde = { version = "1.0.228", features = ["derive"] } +thiserror = "2.0.18" + +serde_json = "1.0.150" +[dev-dependencies] +postcard = { version = "1.1.3", features = ["use-std"] } +serde_json = "1.0.150" diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..4bc31dc --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,5 @@ +/artifacts/ +/corpus/ +/coverage/ +/target/ +/Cargo.lock diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..08ae682 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "serde_json-fuzz" +version = "0.0.0" +authors = ["David Tolnay <[email protected]>"] +edition = "2021" +publish = false + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +bendncode = { path = ".." } +serde_json = "1.0.150" + +[[bin]] +name = "from_slice" +path = "fuzz_targets/from_slice.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/from_slice.rs b/fuzz/fuzz_targets/from_slice.rs new file mode 100644 index 0000000..26fc3e2 --- /dev/null +++ b/fuzz/fuzz_targets/from_slice.rs @@ -0,0 +1,13 @@ +#![no_main] + +use bendncode::from_bytes; +use libfuzzer_sys::fuzz_target; +use serde_json::{from_slice, Value}; + +fuzz_target!(|data: &[u8]| { + if let Ok(x_) = from_bytes::<Value>(data) { + let x = bendncode::to_bytes(&x_).unwrap(); + let y = from_bytes::<Value>(&x).unwrap(); + assert_eq!(x_, y, "{data:?}: {x:?}: {x_}: {y}"); + } +}); diff --git a/src/de.rs b/src/de.rs new file mode 100644 index 0000000..175c147 --- /dev/null +++ b/src/de.rs @@ -0,0 +1,545 @@ +use crate::serde::T; +use crate::{Error, Result}; +use raad::le::R; +use serde::{ + self, Deserialize, + de::{self, DeserializeSeed, Visitor}, +}; + +pub struct Deserializer<'de> { + r: &'de [u8], +} +pub fn from_bytes<'a, T: Deserialize<'a>>(x: &'a [u8]) -> Result<T> { + let mut d = Deserializer { r: x }; + T::deserialize(&mut d) +} +impl<'de> Deserializer<'de> { + pub fn leb128(&mut self) -> Result<u128> { + let mut res = 0u128; + let mut shift = 0; + let mut b = 128; + while b & 128 != 0 { + b = self.r.r::<u8>()?; + res |= ((b & 127) as u128) + .checked_shl(shift * 7) + .ok_or(Error::Overflow)?; + shift += 1; + } + Ok(res) + } + pub fn sleb128(&mut self) -> Result<i128> { + let mut res = 0u128; + let mut shift = 0; + let mut b = 128; + while b & 128 != 0 { + b = self.r.r::<u8>()?; + res |= ((b & 127) as u128) + .checked_shl(shift * 7) + .ok_or(Error::Overflow)?; + shift += 1; + } + if (shift < 128) && ((b & 64) != 0) { + res |= (!0u128).checked_shl(shift * 7).ok_or(Error::Overflow)?; + } + Ok(res.cast_signed()) + } + fn t(&mut self) -> Result<u8> { + Ok(self.r.r()?) + } + + fn a(&self) -> Result<u8> { + self.r.first().ok_or(Error::OOB).copied() + } + #[track_caller] + fn tag(&mut self, expected: T) -> Result<()> { + let t = self.t()?; + if t != expected as u8 { + return Err(Error::Expected { expected, found: t }); + } + Ok(()) + } +} + +impl<'de> serde::Deserializer<'de> for &mut Deserializer<'de> { + type Error = Error; + + fn deserialize_any<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + let tag = self.a()?; + match tag { + x if T::True == x || T::False == x => self.deserialize_bool(visitor), + x if T::Int == x => { + self.tag(T::Int)?; + let v = self.sleb128()? as _; + if let Ok(v64) = i64::try_from(v) { + visitor.visit_i64(v64) + } else { + visitor.visit_i128(v) + } + } + x if T::Uint == x => { + self.tag(T::Uint)?; + let v = self.leb128()?; + if let Ok(v64) = u64::try_from(v) { + visitor.visit_u64(v64) + } else { + visitor.visit_u128(v) + } + } + x if T::Float == x => self.deserialize_f32(visitor), + x if T::Double == x => self.deserialize_f64(visitor), + x if T::String == x => self.deserialize_str(visitor), + x if T::List == x => self.deserialize_seq(visitor), + + x if T::Map == x => self.deserialize_map(visitor), + x if T::None == x || T::Some == x => self.deserialize_option(visitor), + x if T::NVariant == x => { + self.tag(T::NVariant)?; // index, followed by an any + visitor.visit_map(MapAccess { de: self, len: 1 }) + } + x if T::SVariant == x => { + self.tag(T::SVariant)?; + self.tag(T::Uint)?; + let _idx = self.leb128()?; + let len = self.leb128()?; + visitor.visit_map(MapAccess { + de: self, + len: len as usize, + }) + } + x if T::UVariant == x => { + self.tag(T::UVariant)?; + self.tag(T::Uint)?; + visitor.visit_u32(self.leb128()? as _) + } + x if T::TVariant == x => { + self.tag(T::TVariant)?; + + self.tag(T::Uint)?; + let _ix = self.leb128()?; + let len = self.leb128()?; + visitor.visit_seq(SeqAccess::new(self, len as usize)) + } + x => Err(Error::NotTag(x)), + } + } + #[inline] + fn is_human_readable(&self) -> bool { + false + } + fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + match self.t()? { + x if T::True == x => visitor.visit_bool(true), + x if T::False == x => visitor.visit_bool(false), + x => Err(Error::Expected { + expected: T::True, + found: x, + }), + } + } + + fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Int)?; + visitor.visit_i8(self.sleb128()? as _) + } + + fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Int)?; + visitor.visit_i16(self.sleb128()? as _) + } + + fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Int)?; + visitor.visit_i32(self.sleb128()? as _) + } + + fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Int)?; + visitor.visit_i64(self.sleb128()? as _) + } + fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Int)?; + visitor.visit_i128(self.sleb128()?) + } + + fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Uint)?; + let v = self.leb128()?; + visitor.visit_u8(v as _) + } + + fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Uint)?; + let v = self.leb128()?; + visitor.visit_u16(v as _) + } + + fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Uint)?; + let v = self.leb128()?; + visitor.visit_u32(v as _) + } + + fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Uint)?; + let v = self.leb128()?; + visitor.visit_u64(v as _) + } + + fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Uint)?; + let v = self.leb128()?; + visitor.visit_u128(v) + } + + fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Float)?; + visitor.visit_f32(self.r.r()?) + } + + fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Double)?; + visitor.visit_f64(self.r.r()?) + } + + fn deserialize_char<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Uint)?; + let v = self.leb128()?; + visitor.visit_char(char::from_u32(v as _).ok_or(Error::NotChar(v as u32))?) + } + + fn deserialize_str<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + match self.a()? { + x if T::Uint == x => { + self.tag(T::Uint)?; + let v = self.leb128()?; + visitor.visit_str(&v.to_string()) + } + _ => { + self.tag(T::String)?; + let len = self.leb128()? as usize; + let v = visitor + .visit_borrowed_str(str::from_utf8(&self.r.get(..len).ok_or(Error::OOB)?)?); + self.r = self.r.get(len..).ok_or(Error::OOB)?; + v + } + } + } + + fn deserialize_string<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.deserialize_byte_buf(visitor) + } + + fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::String)?; + let len = self.leb128()? as usize; + let v = visitor.visit_borrowed_bytes(&self.r.get(..len).ok_or(Error::OOB)?); + self.r = &self.r.get(len..).ok_or(Error::OOB)?; + v + } + + fn deserialize_option<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + match self.t()? { + x if T::Some == x => visitor.visit_some(self), + x if T::None == x => visitor.visit_none(), + x => Err(Error::Expected { + expected: T::Some, + found: x, + }), + } + } + + fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::None)?; + visitor.visit_unit() + } + + fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::List)?; + let len = self.leb128()? as usize; + visitor.visit_seq(SeqAccess::new(self, len)) + } + + fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_tuple_struct<V>( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_map<V>(self, _visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.tag(T::Map)?; + let len = self.leb128()? as usize; + _visitor.visit_map(MapAccess { de: self, len }) + } + + fn deserialize_struct<V>( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result<V::Value> + where + V: Visitor<'de>, + { + // println!("hello"); + self.deserialize_map(visitor) + } + + fn deserialize_enum<V>( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result<V::Value> + where + V: Visitor<'de>, + { + let tag = self.t()?; + + // let variant_index = self.leb128()? as u32; + + visitor.visit_enum(EnumAccess::new(self, tag)) + } + + fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + match self.t()? { + x if T::String == x => { + let len = self.leb128()? as usize; + let v = str::from_utf8(&self.r.get(..len).ok_or(Error::OOB)?)?; + self.r = self.r.get(len..).ok_or(Error::OOB)?; + visitor.visit_borrowed_str(&v) + } + x if T::Uint == x => visitor.visit_u32(self.leb128()? as _), + x => Err(Error::Expected { + expected: T::String, + found: x, + }), + } + } + + fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + self.deserialize_any(visitor) + } +} + +struct SeqAccess<'a, 'de> { + de: &'a mut Deserializer<'de>, + len: usize, +} + +impl<'a, 'de> SeqAccess<'a, 'de> { + fn new(de: &'a mut Deserializer<'de>, len: usize) -> Self { + SeqAccess { de, len } + } +} + +impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> { + type Error = Error; + + fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>> + where + T: DeserializeSeed<'de>, + { + if self.len == 0 { + Ok(None) + } else { + self.len -= 1; + seed.deserialize(&mut *self.de).map(Some) + } + } +} + +struct EnumAccess<'a, 'de> { + de: &'a mut Deserializer<'de>, + _tag: u8, +} + +impl<'a, 'de> EnumAccess<'a, 'de> { + fn new(de: &'a mut Deserializer<'de>, _tag: u8) -> Self { + EnumAccess { de, _tag } + } +} + +impl<'a, 'de> de::EnumAccess<'de> for EnumAccess<'a, 'de> { + type Error = Error; + type Variant = Self; + + fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self)> + where + V: DeserializeSeed<'de>, + { + let val = seed.deserialize(&mut *self.de)?; + Ok((val, self)) + } +} + +impl<'a, 'de> de::VariantAccess<'de> for EnumAccess<'a, 'de> { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + Ok(()) + } + + fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value> + where + T: DeserializeSeed<'de>, + { + seed.deserialize(self.de) + } + + fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + let x = self.de.leb128()?; + assert_eq!(x, len as u128); + visitor.visit_seq(SeqAccess::new(self.de, len)) + } + + fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value> + where + V: Visitor<'de>, + { + let x = self.de.leb128()?; + assert_eq!(x, _fields.len() as u128); + // T::SVariant data follows + visitor.visit_map(MapAccess { + de: self.de, + len: _fields.len(), + }) + } +} + +struct MapAccess<'a, 'de> { + de: &'a mut Deserializer<'de>, + len: usize, +} +impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { + type Error = Error; + + fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>> + where + K: DeserializeSeed<'de>, + { + if self.len == 0 { + Ok(None) + } else { + self.len -= 1; + seed.deserialize(&mut *self.de).map(Some) + } + } + + fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value> + where + V: DeserializeSeed<'de>, + { + seed.deserialize(&mut *self.de) + } +} +#[cfg(test)] +mod tests; diff --git a/src/de/tests.rs b/src/de/tests.rs new file mode 100644 index 0000000..746d5b3 --- /dev/null +++ b/src/de/tests.rs @@ -0,0 +1,101 @@ +use std::collections::HashMap; + +use lsp_types::{CompletionRegistrationOptions, CompletionResponse}; +use serde::{Deserialize, Serialize, de::DeserializeOwned}; +use serde_json::to_value; + +use crate::to_bytes; + +use super::*; +#[track_caller] +fn rtt<X: Serialize + PartialEq + std::fmt::Debug>(x: X) +where + X: for<'a> Deserialize<'a>, +{ + let b = to_bytes(&x).unwrap(); + for &ch in &b { + if ch.is_ascii_alphabetic() { + print!("{},", ch as char); + } else { + print!("{ch},"); + } + } + dbg!(from_bytes::<serde_json::Value>(&b).unwrap()); + // println!("{b:?}"); + let f = from_bytes::<X>(&b).unwrap_or_else(|e| { + println!("{e}"); + panic!() + }); + assert_eq!(f, x); +} + +#[test] +fn basic() { + rtt(4); + rtt([1, 2, 3, 4]); + rtt((1, 2, 3)); + rtt(("hello".to_string(), 5, vec![1, 2, 3])); + rtt(HashMap::<u8, u16>::from_iter([(1, 5), (4, 2)])); + rtt(Some(4)); + rtt(None::<u8>); + rtt(-1); + rtt(-487); + let x: &str = from_bytes(&to_bytes(&"hi").unwrap()).unwrap(); +} + +#[test] +fn structs() { + #[derive(Serialize, Deserialize, Debug, PartialEq)] + struct Y { + abra: u32, + bada: u16, + hocu: Vec<HashMap<u16, f32>>, + } + rtt(Y { + abra: 4, + bada: 5, + hocu: vec![ + HashMap::from_iter([(1, 5.0), (41, 41.0)]), + HashMap::default(), + ], + }); + #[derive(Serialize, Deserialize, Debug, PartialEq)] + struct Z(u32); + rtt(Z(4515161)); + #[derive(Serialize, Deserialize, Debug, PartialEq)] + struct A; + rtt(A); +} +#[test] +fn enums() { + #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] + enum E { + Unit, + Newtype(u32), + Tuple(u32, u32), + Struct { a: u32 }, + } + + rtt(E::Unit); + rtt(E::Newtype(51)); + + rtt(E::Tuple(1, 24151561)); + rtt(E::Struct { a: 1 }); +} +#[test] +fn postcard_fails_these() { + #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] + struct A { + #[serde(skip_serializing_if = "Option::is_none")] + x: Option<u32>, + #[serde(skip_serializing_if = "Option::is_none")] + y: Option<u32>, + #[serde(skip_serializing_if = "Option::is_none")] + z: Option<u32>, + } + rtt(A { + x: Some(4), + y: None, + z: Some(5), + }); +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..6f70009 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,52 @@ +use std::fmt::Display; + +use thiserror::Error; + +use crate::serde::T; + +pub type Result<T> = std::result::Result<T, Error>; + +// This is a bare-bones implementation. A real library would provide additional +// information in its error type, for example the line and column at which the +// error occurred, the byte offset into the input, or the current key being +// processed. +#[derive(Debug, Error)] +pub enum Error { + #[error("{0}")] + Message(String), + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("expected {expected:?}, found {found}")] + Expected { expected: T, found: u8 }, + #[error(transparent)] + FromUtf8Error(#[from] std::string::FromUtf8Error), + #[error(transparent)] + Utf8Error(#[from] std::str::Utf8Error), + #[error("a length is required for most things")] + LenLess, + #[error("char {0} was not in the valid range of chars")] + NotChar(u32), + #[error("out of bounds index")] + OOB, + #[error("{0} is not a tag")] + NotTag(u8), + #[error("out of range")] + Overflow, +} + +impl serde::ser::Error for Error { + fn custom<T>(msg: T) -> Self + where + T: Display, + { + Self::Message(msg.to_string()) + } +} +impl serde::de::Error for Error { + fn custom<T>(msg: T) -> Self + where + T: Display, + { + Self::Message(msg.to_string()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..b7b304f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,8 @@ +#![cfg_attr(test, feature(generic_const_exprs, min_adt_const_params))] +mod de; +mod error; +mod ser; +pub(crate) mod serde; +pub use de::{Deserializer, from_bytes}; +pub use error::{Error, Result}; +pub use ser::{Serializer, to_bytes}; diff --git a/src/ser.rs b/src/ser.rs new file mode 100644 index 0000000..82b3600 --- /dev/null +++ b/src/ser.rs @@ -0,0 +1,470 @@ +use crate::error::{Error, Result}; +use crate::serde::T; +use raad::le::*; +use serde::{Serialize, ser}; +use std::io::Write; + +pub fn to_bytes<T>(value: &T) -> Result<Vec<u8>> +where + T: Serialize, +{ + let mut v = vec![]; + let mut serializer = Serializer { w: &mut v }; + value.serialize(&mut serializer)?; + Ok(v) +} +pub struct Serializer<W: std::io::Write> { + w: W, +} +impl<W: std::io::Write> Serializer<W> { + fn wh(&mut self, h: T, w: impl raad::le::Writable) -> Result<()> { + self.t(h)?; + self.w.w(w)?; + Ok(()) + // self.w.w(e); + } + fn t(&mut self, h: T) -> Result<()> { + self.w.w(h as u8)?; + Ok(()) + } + fn leb128h(&mut self, h: T, w: impl Into<u128>) -> Result<()> { + self.t(h)?; + self.leb128(w) + } + fn leb128(&mut self, w: impl Into<u128>) -> Result<()> { + let mut w = w.into(); + loop { + let n = (w & 127) as u8; + w >>= 7; + if w == 0 { + self.w.w(n)?; + break; + } else { + self.w.w(n | 1 << 7)?; + } + } + Ok(()) + } + fn sleb128(&mut self, mut value: i128) -> Result<()> { + loop { + let n = (value & 127) as u8; + value >>= 7; + let sign_bit = n & 64; + if (value == 0 && sign_bit == 0) || (value == -1 && sign_bit != 0) { + self.w.w(n)?; + break; + } else { + self.w.w(n | 1 << 7)?; + } + } + Ok(()) + } +} + +impl<W: std::io::Write> ser::Serializer for &mut Serializer<W> { + // The output type produced by this `Serializer` during successful + // serialization. Most serializers that produce text or binary output should + // set `Ok = ()` and serialize into an `io::Write` or buffer contained + // within the `Serializer` instance, as happens here. Serializers that build + // in-memory data structures may be simplified by using `Ok` to propagate + // the data structure around. + type Ok = (); + + // The error type when some error occurs during serialization. + type Error = Error; + + // Associated types for keeping track of additional state while serializing + // compound data structures like sequences and maps. In this case no + // additional state is required beyond what is already stored in the + // Serializer struct. + type SerializeSeq = Self; + type SerializeTuple = Self; + type SerializeTupleStruct = Self; + type SerializeTupleVariant = Self; + type SerializeMap = Self; + type SerializeStruct = Self; + type SerializeStructVariant = Self; + + // Here we go with the simple methods. The following 12 methods receive one + // of the primitive types of the data model and map it to JSON by appending + // into the output string. + fn serialize_bool(self, v: bool) -> Result<()> { + // println!("serialize bool {v}"); + self.w.w(v as u8)?; + Ok(()) + } + + // JSON does not distinguish between different sizes of integers, so all + // signed integers will be serialized the same and all unsigned integers + // will be serialized the same. Other formats, especially compact binary + // formats, may need independent logic for the different sizes. + fn serialize_i8(self, v: i8) -> Result<()> { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i16(self, v: i16) -> Result<()> { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i32(self, v: i32) -> Result<()> { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i64(self, v: i64) -> Result<()> { + // println!("serialize i64 {v}"); + self.t(T::Int)?; + self.sleb128(v as i128) + } + + fn serialize_u8(self, v: u8) -> Result<()> { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u16(self, v: u16) -> Result<()> { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u32(self, v: u32) -> Result<()> { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u64(self, v: u64) -> Result<()> { + // println!("serialize u64 {v}"); + self.leb128h(T::Uint, v) + } + + fn serialize_f32(self, v: f32) -> Result<()> { + // println!("serialize f32 {v}"); + self.wh(T::Float, v) + } + + fn serialize_f64(self, v: f64) -> Result<()> { + // println!("serialize f64 {v}"); + self.wh(T::Double, v) + } + + fn serialize_char(self, v: char) -> Result<()> { + self.serialize_u32(u32::from(v)) + } + + fn serialize_str(self, v: &str) -> Result<()> { + self.serialize_bytes(v.as_bytes()) + } + + fn serialize_bytes(self, v: &[u8]) -> Result<()> { + // println!("serialize bytes {v:?}"); + self.leb128h(T::String, v.len() as u128)?; + self.w.w(v)?; + Ok(()) + } + + // An absent optional is represented as the JSON `null`. + fn serialize_none(self) -> Result<()> { + self.t(T::None) + } + + fn serialize_some<U>(self, value: &U) -> Result<()> + where + U: ?Sized + Serialize, + { + self.t(T::Some)?; + value.serialize(self) + } + + fn serialize_unit(self) -> Result<()> { + self.serialize_none() + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { + self.serialize_none() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + i: u32, + _variant: &'static str, + ) -> Result<()> { + // println!("uv"); + self.w.w(T::UVariant as u8)?; + self.serialize_u32(i) + } + + // As is done here, serializers are encouraged to treat newtype structs as + // insignificant wrappers around the data they contain. + fn serialize_newtype_struct<T>(self, _name: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + // Note that newtype variant (and all of the other variant serialization + // methods) refer exclusively to the "externally tagged" enum + // representation. + // + // Serialize this to JSON in externally tagged form as `{ NAME: VALUE }`. + fn serialize_newtype_variant<U>( + self, + _name: &'static str, + ix: u32, + _vname: &'static str, + value: &U, + ) -> Result<()> + where + U: ?Sized + Serialize, + { + // println!("serialize variant {_vname} of {_name} ix {ix}"); + self.w.w(T::NVariant as u8)?; + self.serialize_u32(ix)?; + value.serialize(self) + } + + // Now we get to the serialization of compound types. + // + // The start of the sequence, each value, and the end are three separate + // method calls. This one is responsible only for serializing the start, + // which in JSON is `[`. + // + // The length of the sequence may or may not be known ahead of time. This + // doesn't make a difference in JSON because the length is not represented + // explicitly in the serialized form. Some serializers may only be able to + // support sequences for which the length is known up front. + fn serialize_seq(self, l: Option<usize>) -> Result<Self::SerializeSeq> { + // println!("serialize list of len {l:?}"); + self.leb128h(T::List, l.unwrap() as u128)?; + Ok(self) + } + + fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple> { + self.serialize_seq(Some(len)) + } + + // Tuple structs look just like sequences in JSON. + fn serialize_tuple_struct( + self, + _name: &'static str, + len: usize, + ) -> Result<Self::SerializeTupleStruct> { + self.serialize_seq(Some(len)) + } + + // Tuple variants are represented in JSON as `{ NAME: [DATA...] }`. Again + // this method is only responsible for the externally tagged representation. + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeTupleVariant> { + // println!("serialize tuple variant {_variant} of {_name} {_variant_index} {_variant}"); + self.w.w(T::TVariant as u8)?; + self.serialize_u32(_variant_index)?; + self.leb128(_len as u128)?; + // self.output += "{"; + // variant.serialize(&mut *self)?; + // self.output += ":["; + Ok(self) + } + + // Maps are represented in JSON as `{ K: V, K: V, ... }`. + fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> { + self.w.w(T::Map as u8)?; + // println!("{_len:?}"); + self.leb128(_len.ok_or(Error::LenLess)? as u128)?; + Ok(self) + + // Ok(self) + } + + // Structs look just like maps in JSON. In particular, JSON requires that we + // serialize the field names of the struct. Other formats may be able to + // omit the field names when serializing structs because the corresponding + // Deserialize implementation is required to know what the keys are without + // looking at the serialized data. + fn serialize_struct(self, _name: &'static str, len: usize) -> Result<Self::SerializeStruct> { + self.serialize_map(Some(len)) + } + + // Struct variants are represented in JSON as `{ NAME: { K: V, ... } }`. + // This is the externally tagged representation. + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeStructVariant> { + // println!("ser struct v {_name} {_variant_index} {variant} {_len}"); + self.w.w(T::SVariant as u8)?; + self.serialize_u32(_variant_index)?; + self.leb128(_len as u128)?; + Ok(self) + } +} + +// The following 7 impls deal with the serialization of compound types like +// sequences and maps. Serialization of such types is begun by a Serializer +// method and followed by zero or more calls to serialize individual elements of +// the compound type and one call to end the compound type. +// +// This impl is SerializeSeq so these methods are called after `serialize_seq` +// is called on the Serializer. +impl<W: Write> ser::SerializeSeq for &mut Serializer<W> { + // Must match the `Ok` type of the serializer. + type Ok = (); + // Must match the `Error` type of the serializer. + type Error = Error; + + // Serialize a single element of the sequence. + fn serialize_element<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + // Close the sequence. + fn end(self) -> Result<()> { + Ok(()) + } +} + +// Same thing but for tuples. +impl<W: Write> ser::SerializeTuple for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_element<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +// Same thing but for tuple structs. +impl<W: Write> ser::SerializeTupleStruct for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_field<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} +/// A Seq. +impl<W: Write> ser::SerializeTupleVariant for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_field<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +// Some `Serialize` types are not able to hold a key and value in memory at the +// same time so `SerializeMap` implementations are required to support +// `serialize_key` and `serialize_value` individually. +// +// There is a third optional method on the `SerializeMap` trait. The +// `serialize_entry` method allows serializers to optimize for the case where +// key and value are both available simultaneously. In JSON it doesn't make a +// difference so the default behavior for `serialize_entry` is fine. +impl<W: Write> ser::SerializeMap for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + // The Serde data model allows map keys to be any serializable type. JSON + // only allows string keys so the implementation below will produce invalid + // JSON if the key serializes as something other than a string. + // + // A real JSON serializer would need to validate that map keys are strings. + // This can be done by using a different Serializer to serialize the key + // (instead of `&mut **self`) and having that other serializer only + // implement `serialize_str` and return an error on any other data type. + fn serialize_key<T>(&mut self, key: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + key.serialize(&mut **self) + } + + // It doesn't make a difference whether the colon is printed at the end of + // `serialize_key` or at the beginning of `serialize_value`. In this case + // the code is a bit simpler having it here. + fn serialize_value<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +// Structs are like maps in which the keys are constrained to be compile-time +// constant strings. +impl<W: Write> ser::SerializeStruct for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + // key.serialize(&mut **self)?; + key.serialize(&mut **self)?; + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } + + fn skip_field(&mut self, key: &'static str) -> std::prelude::v1::Result<(), Self::Error> { + let _ = key; + Ok(()) + } +} + +// Similar to `SerializeTupleVariant`, here the `end` method is responsible for +// closing both of the curly braces opened by `serialize_struct_variant`. +impl<W: Write> ser::SerializeStructVariant for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + key.serialize(&mut **self)?; + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} +#[cfg(test)] +mod tests; diff --git a/src/ser/tests.rs b/src/ser/tests.rs new file mode 100644 index 0000000..eb9e968 --- /dev/null +++ b/src/ser/tests.rs @@ -0,0 +1,107 @@ +use super::*; +use atools::prelude::*; +const fn lenleb128(x: u128) -> usize { + let mut n2 = 0; + let mut w = x; + loop { + w >>= 7; + if w == 0 { + n2 += 1; + break; + } else { + n2 += 1; + } + } + n2 +} +const fn leb128<const X: u128>() -> [u8; lenleb128(X)] { + let mut i = 0; + let mut into: [u8; _] = [0; _]; + let mut w = X; + loop { + let n = (w & 127) as u8; + w >>= 7; + if w == 0 { + into[i] = n; + break; + } else { + into[i] = n | 1 << 7; + } + i += 1; + } + into +} + +#[test] +fn test_struct() { + #[derive(Serialize)] + struct Test { + int: u32, + seq: Vec<&'static str>, + } + + let test = Test { + int: 1, + seq: vec!["hello", "bendncode"], + }; + let expected = (*b"m") + .couple(leb128::<2>()) + .couple(*b"s\x03intu") + .couple(leb128::<1>()) + .couple(*b"s\x03seq") + .join(b'l') + .couple(leb128::<2>()) + .join(b's') + .couple(leb128::<5>()) + .couple(*b"hello") + .join(b's') + .couple(leb128::<9>()) + .couple(*b"bendncode"); + + assert_eq!(to_bytes(&test).unwrap(), expected); +} + +#[test] +fn test_enum() { + #[derive(Serialize)] + enum E { + Unit, + Newtype(u32), + Tuple(u32, u32), + Struct { a: u32 }, + } + + let u = E::Unit; + let expected = [b'n', b'u', 0]; + assert_eq!(to_bytes(&u).unwrap(), expected); + + // println!("--"); + + let n = E::Newtype(51); + let expected = b"vu" + .couple(leb128::<1>()) + .join(b'u') // i rather dislike this byte + .couple(leb128::<51>()); + + assert_eq!(to_bytes(&n).unwrap(), expected); + // println!("--"); + let t = E::Tuple(1, 24151561); + let expected = b"xu" + .couple(leb128::<2>()) + .couple(leb128::<2>()) + .join(b'u') + .couple(leb128::<1>()) + .join(b'u') + .couple(leb128::<24151561>()); + assert_eq!(to_bytes(&t).unwrap(), expected); + // println!("--"); + let s = E::Struct { a: 1 }; + let expected = b"yu" + .couple(leb128::<3>()) + .couple(leb128::<1>()) + .join(b's') + .couple(leb128::<1>()) + .couple(*b"au") + .couple(leb128::<1>()); + assert_eq!(to_bytes(&s).unwrap(), expected); +} diff --git a/src/serde.rs b/src/serde.rs new file mode 100644 index 0000000..1948e69 --- /dev/null +++ b/src/serde.rs @@ -0,0 +1,26 @@ +#[repr(u8)] +#[derive(Copy, Clone, Debug)] +pub enum T { + False = 0, + True = 1, + + Int = b'i', + Uint = b'u', + Float = b'f', + Double = b'd', + String = b's', + NVariant = b'v', + TVariant = b'x', + SVariant = b'y', + UVariant = b'n', + List = b'l', + // Tuple = b't', + Map = b'm', + None = b'z', + Some = b'o', +} +impl PartialEq<u8> for T { + fn eq(&self, other: &u8) -> bool { + *self as u8 == *other + } +} |