bencode inspired tight self describing serialization format
bendn 13 days ago
commit acc4ed5
-rw-r--r--.gitignore2
-rw-r--r--Cargo.toml15
-rw-r--r--fuzz/.gitignore5
-rw-r--r--fuzz/Cargo.toml20
-rw-r--r--fuzz/fuzz_targets/from_slice.rs13
-rw-r--r--src/de.rs545
-rw-r--r--src/de/tests.rs101
-rw-r--r--src/error.rs52
-rw-r--r--src/lib.rs8
-rw-r--r--src/ser.rs470
-rw-r--r--src/ser/tests.rs107
-rw-r--r--src/serde.rs26
12 files changed, 1364 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..96ef6c0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..4d4ff23
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "bendncode"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+atools = "0.1.12"
+raad = { version = "0.1.3" }
+serde = { version = "1.0.228", features = ["derive"] }
+thiserror = "2.0.18"
+
+serde_json = "1.0.150"
+[dev-dependencies]
+postcard = { version = "1.1.3", features = ["use-std"] }
+serde_json = "1.0.150"
diff --git a/fuzz/.gitignore b/fuzz/.gitignore
new file mode 100644
index 0000000..4bc31dc
--- /dev/null
+++ b/fuzz/.gitignore
@@ -0,0 +1,5 @@
+/artifacts/
+/corpus/
+/coverage/
+/target/
+/Cargo.lock
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
new file mode 100644
index 0000000..08ae682
--- /dev/null
+++ b/fuzz/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "serde_json-fuzz"
+version = "0.0.0"
+authors = ["David Tolnay <[email protected]>"]
+edition = "2021"
+publish = false
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.4"
+bendncode = { path = ".." }
+serde_json = "1.0.150"
+
+[[bin]]
+name = "from_slice"
+path = "fuzz_targets/from_slice.rs"
+test = false
+doc = false
diff --git a/fuzz/fuzz_targets/from_slice.rs b/fuzz/fuzz_targets/from_slice.rs
new file mode 100644
index 0000000..26fc3e2
--- /dev/null
+++ b/fuzz/fuzz_targets/from_slice.rs
@@ -0,0 +1,13 @@
+#![no_main]
+
+use bendncode::from_bytes;
+use libfuzzer_sys::fuzz_target;
+use serde_json::{from_slice, Value};
+
+fuzz_target!(|data: &[u8]| {
+ if let Ok(x_) = from_bytes::<Value>(data) {
+ let x = bendncode::to_bytes(&x_).unwrap();
+ let y = from_bytes::<Value>(&x).unwrap();
+ assert_eq!(x_, y, "{data:?}: {x:?}: {x_}: {y}");
+ }
+});
diff --git a/src/de.rs b/src/de.rs
new file mode 100644
index 0000000..175c147
--- /dev/null
+++ b/src/de.rs
@@ -0,0 +1,545 @@
+use crate::serde::T;
+use crate::{Error, Result};
+use raad::le::R;
+use serde::{
+ self, Deserialize,
+ de::{self, DeserializeSeed, Visitor},
+};
+
+pub struct Deserializer<'de> {
+ r: &'de [u8],
+}
+pub fn from_bytes<'a, T: Deserialize<'a>>(x: &'a [u8]) -> Result<T> {
+ let mut d = Deserializer { r: x };
+ T::deserialize(&mut d)
+}
+impl<'de> Deserializer<'de> {
+ pub fn leb128(&mut self) -> Result<u128> {
+ let mut res = 0u128;
+ let mut shift = 0;
+ let mut b = 128;
+ while b & 128 != 0 {
+ b = self.r.r::<u8>()?;
+ res |= ((b & 127) as u128)
+ .checked_shl(shift * 7)
+ .ok_or(Error::Overflow)?;
+ shift += 1;
+ }
+ Ok(res)
+ }
+ pub fn sleb128(&mut self) -> Result<i128> {
+ let mut res = 0u128;
+ let mut shift = 0;
+ let mut b = 128;
+ while b & 128 != 0 {
+ b = self.r.r::<u8>()?;
+ res |= ((b & 127) as u128)
+ .checked_shl(shift * 7)
+ .ok_or(Error::Overflow)?;
+ shift += 1;
+ }
+ if (shift < 128) && ((b & 64) != 0) {
+ res |= (!0u128).checked_shl(shift * 7).ok_or(Error::Overflow)?;
+ }
+ Ok(res.cast_signed())
+ }
+ fn t(&mut self) -> Result<u8> {
+ Ok(self.r.r()?)
+ }
+
+ fn a(&self) -> Result<u8> {
+ self.r.first().ok_or(Error::OOB).copied()
+ }
+ #[track_caller]
+ fn tag(&mut self, expected: T) -> Result<()> {
+ let t = self.t()?;
+ if t != expected as u8 {
+ return Err(Error::Expected { expected, found: t });
+ }
+ Ok(())
+ }
+}
+
+impl<'de> serde::Deserializer<'de> for &mut Deserializer<'de> {
+ type Error = Error;
+
+ fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ let tag = self.a()?;
+ match tag {
+ x if T::True == x || T::False == x => self.deserialize_bool(visitor),
+ x if T::Int == x => {
+ self.tag(T::Int)?;
+ let v = self.sleb128()? as _;
+ if let Ok(v64) = i64::try_from(v) {
+ visitor.visit_i64(v64)
+ } else {
+ visitor.visit_i128(v)
+ }
+ }
+ x if T::Uint == x => {
+ self.tag(T::Uint)?;
+ let v = self.leb128()?;
+ if let Ok(v64) = u64::try_from(v) {
+ visitor.visit_u64(v64)
+ } else {
+ visitor.visit_u128(v)
+ }
+ }
+ x if T::Float == x => self.deserialize_f32(visitor),
+ x if T::Double == x => self.deserialize_f64(visitor),
+ x if T::String == x => self.deserialize_str(visitor),
+ x if T::List == x => self.deserialize_seq(visitor),
+
+ x if T::Map == x => self.deserialize_map(visitor),
+ x if T::None == x || T::Some == x => self.deserialize_option(visitor),
+ x if T::NVariant == x => {
+ self.tag(T::NVariant)?; // index, followed by an any
+ visitor.visit_map(MapAccess { de: self, len: 1 })
+ }
+ x if T::SVariant == x => {
+ self.tag(T::SVariant)?;
+ self.tag(T::Uint)?;
+ let _idx = self.leb128()?;
+ let len = self.leb128()?;
+ visitor.visit_map(MapAccess {
+ de: self,
+ len: len as usize,
+ })
+ }
+ x if T::UVariant == x => {
+ self.tag(T::UVariant)?;
+ self.tag(T::Uint)?;
+ visitor.visit_u32(self.leb128()? as _)
+ }
+ x if T::TVariant == x => {
+ self.tag(T::TVariant)?;
+
+ self.tag(T::Uint)?;
+ let _ix = self.leb128()?;
+ let len = self.leb128()?;
+ visitor.visit_seq(SeqAccess::new(self, len as usize))
+ }
+ x => Err(Error::NotTag(x)),
+ }
+ }
+ #[inline]
+ fn is_human_readable(&self) -> bool {
+ false
+ }
+ fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ match self.t()? {
+ x if T::True == x => visitor.visit_bool(true),
+ x if T::False == x => visitor.visit_bool(false),
+ x => Err(Error::Expected {
+ expected: T::True,
+ found: x,
+ }),
+ }
+ }
+
+ fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Int)?;
+ visitor.visit_i8(self.sleb128()? as _)
+ }
+
+ fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Int)?;
+ visitor.visit_i16(self.sleb128()? as _)
+ }
+
+ fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Int)?;
+ visitor.visit_i32(self.sleb128()? as _)
+ }
+
+ fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Int)?;
+ visitor.visit_i64(self.sleb128()? as _)
+ }
+ fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Int)?;
+ visitor.visit_i128(self.sleb128()?)
+ }
+
+ fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Uint)?;
+ let v = self.leb128()?;
+ visitor.visit_u8(v as _)
+ }
+
+ fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Uint)?;
+ let v = self.leb128()?;
+ visitor.visit_u16(v as _)
+ }
+
+ fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Uint)?;
+ let v = self.leb128()?;
+ visitor.visit_u32(v as _)
+ }
+
+ fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Uint)?;
+ let v = self.leb128()?;
+ visitor.visit_u64(v as _)
+ }
+
+ fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Uint)?;
+ let v = self.leb128()?;
+ visitor.visit_u128(v)
+ }
+
+ fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Float)?;
+ visitor.visit_f32(self.r.r()?)
+ }
+
+ fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Double)?;
+ visitor.visit_f64(self.r.r()?)
+ }
+
+ fn deserialize_char<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Uint)?;
+ let v = self.leb128()?;
+ visitor.visit_char(char::from_u32(v as _).ok_or(Error::NotChar(v as u32))?)
+ }
+
+ fn deserialize_str<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ match self.a()? {
+ x if T::Uint == x => {
+ self.tag(T::Uint)?;
+ let v = self.leb128()?;
+ visitor.visit_str(&v.to_string())
+ }
+ _ => {
+ self.tag(T::String)?;
+ let len = self.leb128()? as usize;
+ let v = visitor
+ .visit_borrowed_str(str::from_utf8(&self.r.get(..len).ok_or(Error::OOB)?)?);
+ self.r = self.r.get(len..).ok_or(Error::OOB)?;
+ v
+ }
+ }
+ }
+
+ fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_str(visitor)
+ }
+
+ fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_byte_buf(visitor)
+ }
+
+ fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::String)?;
+ let len = self.leb128()? as usize;
+ let v = visitor.visit_borrowed_bytes(&self.r.get(..len).ok_or(Error::OOB)?);
+ self.r = &self.r.get(len..).ok_or(Error::OOB)?;
+ v
+ }
+
+ fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ match self.t()? {
+ x if T::Some == x => visitor.visit_some(self),
+ x if T::None == x => visitor.visit_none(),
+ x => Err(Error::Expected {
+ expected: T::Some,
+ found: x,
+ }),
+ }
+ }
+
+ fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::None)?;
+ visitor.visit_unit()
+ }
+
+ fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_unit(visitor)
+ }
+
+ fn deserialize_newtype_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ visitor.visit_newtype_struct(self)
+ }
+
+ fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::List)?;
+ let len = self.leb128()? as usize;
+ visitor.visit_seq(SeqAccess::new(self, len))
+ }
+
+ fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_seq(visitor)
+ }
+
+ fn deserialize_tuple_struct<V>(
+ self,
+ _name: &'static str,
+ _len: usize,
+ visitor: V,
+ ) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_seq(visitor)
+ }
+
+ fn deserialize_map<V>(self, _visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.tag(T::Map)?;
+ let len = self.leb128()? as usize;
+ _visitor.visit_map(MapAccess { de: self, len })
+ }
+
+ fn deserialize_struct<V>(
+ self,
+ _name: &'static str,
+ _fields: &'static [&'static str],
+ visitor: V,
+ ) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ // println!("hello");
+ self.deserialize_map(visitor)
+ }
+
+ fn deserialize_enum<V>(
+ self,
+ _name: &'static str,
+ _variants: &'static [&'static str],
+ visitor: V,
+ ) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ let tag = self.t()?;
+
+ // let variant_index = self.leb128()? as u32;
+
+ visitor.visit_enum(EnumAccess::new(self, tag))
+ }
+
+ fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ match self.t()? {
+ x if T::String == x => {
+ let len = self.leb128()? as usize;
+ let v = str::from_utf8(&self.r.get(..len).ok_or(Error::OOB)?)?;
+ self.r = self.r.get(len..).ok_or(Error::OOB)?;
+ visitor.visit_borrowed_str(&v)
+ }
+ x if T::Uint == x => visitor.visit_u32(self.leb128()? as _),
+ x => Err(Error::Expected {
+ expected: T::String,
+ found: x,
+ }),
+ }
+ }
+
+ fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_any(visitor)
+ }
+}
+
+struct SeqAccess<'a, 'de> {
+ de: &'a mut Deserializer<'de>,
+ len: usize,
+}
+
+impl<'a, 'de> SeqAccess<'a, 'de> {
+ fn new(de: &'a mut Deserializer<'de>, len: usize) -> Self {
+ SeqAccess { de, len }
+ }
+}
+
+impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> {
+ type Error = Error;
+
+ fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
+ where
+ T: DeserializeSeed<'de>,
+ {
+ if self.len == 0 {
+ Ok(None)
+ } else {
+ self.len -= 1;
+ seed.deserialize(&mut *self.de).map(Some)
+ }
+ }
+}
+
+struct EnumAccess<'a, 'de> {
+ de: &'a mut Deserializer<'de>,
+ _tag: u8,
+}
+
+impl<'a, 'de> EnumAccess<'a, 'de> {
+ fn new(de: &'a mut Deserializer<'de>, _tag: u8) -> Self {
+ EnumAccess { de, _tag }
+ }
+}
+
+impl<'a, 'de> de::EnumAccess<'de> for EnumAccess<'a, 'de> {
+ type Error = Error;
+ type Variant = Self;
+
+ fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self)>
+ where
+ V: DeserializeSeed<'de>,
+ {
+ let val = seed.deserialize(&mut *self.de)?;
+ Ok((val, self))
+ }
+}
+
+impl<'a, 'de> de::VariantAccess<'de> for EnumAccess<'a, 'de> {
+ type Error = Error;
+
+ fn unit_variant(self) -> Result<()> {
+ Ok(())
+ }
+
+ fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
+ where
+ T: DeserializeSeed<'de>,
+ {
+ seed.deserialize(self.de)
+ }
+
+ fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ let x = self.de.leb128()?;
+ assert_eq!(x, len as u128);
+ visitor.visit_seq(SeqAccess::new(self.de, len))
+ }
+
+ fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
+ where
+ V: Visitor<'de>,
+ {
+ let x = self.de.leb128()?;
+ assert_eq!(x, _fields.len() as u128);
+ // T::SVariant data follows
+ visitor.visit_map(MapAccess {
+ de: self.de,
+ len: _fields.len(),
+ })
+ }
+}
+
+struct MapAccess<'a, 'de> {
+ de: &'a mut Deserializer<'de>,
+ len: usize,
+}
+impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> {
+ type Error = Error;
+
+ fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
+ where
+ K: DeserializeSeed<'de>,
+ {
+ if self.len == 0 {
+ Ok(None)
+ } else {
+ self.len -= 1;
+ seed.deserialize(&mut *self.de).map(Some)
+ }
+ }
+
+ fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
+ where
+ V: DeserializeSeed<'de>,
+ {
+ seed.deserialize(&mut *self.de)
+ }
+}
+#[cfg(test)]
+mod tests;
diff --git a/src/de/tests.rs b/src/de/tests.rs
new file mode 100644
index 0000000..746d5b3
--- /dev/null
+++ b/src/de/tests.rs
@@ -0,0 +1,101 @@
+use std::collections::HashMap;
+
+use lsp_types::{CompletionRegistrationOptions, CompletionResponse};
+use serde::{Deserialize, Serialize, de::DeserializeOwned};
+use serde_json::to_value;
+
+use crate::to_bytes;
+
+use super::*;
+#[track_caller]
+fn rtt<X: Serialize + PartialEq + std::fmt::Debug>(x: X)
+where
+ X: for<'a> Deserialize<'a>,
+{
+ let b = to_bytes(&x).unwrap();
+ for &ch in &b {
+ if ch.is_ascii_alphabetic() {
+ print!("{},", ch as char);
+ } else {
+ print!("{ch},");
+ }
+ }
+ dbg!(from_bytes::<serde_json::Value>(&b).unwrap());
+ // println!("{b:?}");
+ let f = from_bytes::<X>(&b).unwrap_or_else(|e| {
+ println!("{e}");
+ panic!()
+ });
+ assert_eq!(f, x);
+}
+
+#[test]
+fn basic() {
+ rtt(4);
+ rtt([1, 2, 3, 4]);
+ rtt((1, 2, 3));
+ rtt(("hello".to_string(), 5, vec![1, 2, 3]));
+ rtt(HashMap::<u8, u16>::from_iter([(1, 5), (4, 2)]));
+ rtt(Some(4));
+ rtt(None::<u8>);
+ rtt(-1);
+ rtt(-487);
+ let x: &str = from_bytes(&to_bytes(&"hi").unwrap()).unwrap();
+}
+
+#[test]
+fn structs() {
+ #[derive(Serialize, Deserialize, Debug, PartialEq)]
+ struct Y {
+ abra: u32,
+ bada: u16,
+ hocu: Vec<HashMap<u16, f32>>,
+ }
+ rtt(Y {
+ abra: 4,
+ bada: 5,
+ hocu: vec![
+ HashMap::from_iter([(1, 5.0), (41, 41.0)]),
+ HashMap::default(),
+ ],
+ });
+ #[derive(Serialize, Deserialize, Debug, PartialEq)]
+ struct Z(u32);
+ rtt(Z(4515161));
+ #[derive(Serialize, Deserialize, Debug, PartialEq)]
+ struct A;
+ rtt(A);
+}
+#[test]
+fn enums() {
+ #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
+ enum E {
+ Unit,
+ Newtype(u32),
+ Tuple(u32, u32),
+ Struct { a: u32 },
+ }
+
+ rtt(E::Unit);
+ rtt(E::Newtype(51));
+
+ rtt(E::Tuple(1, 24151561));
+ rtt(E::Struct { a: 1 });
+}
+#[test]
+fn postcard_fails_these() {
+ #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
+ struct A {
+ #[serde(skip_serializing_if = "Option::is_none")]
+ x: Option<u32>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ y: Option<u32>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ z: Option<u32>,
+ }
+ rtt(A {
+ x: Some(4),
+ y: None,
+ z: Some(5),
+ });
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..6f70009
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,52 @@
+use std::fmt::Display;
+
+use thiserror::Error;
+
+use crate::serde::T;
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+// This is a bare-bones implementation. A real library would provide additional
+// information in its error type, for example the line and column at which the
+// error occurred, the byte offset into the input, or the current key being
+// processed.
+#[derive(Debug, Error)]
+pub enum Error {
+ #[error("{0}")]
+ Message(String),
+ #[error(transparent)]
+ Io(#[from] std::io::Error),
+ #[error("expected {expected:?}, found {found}")]
+ Expected { expected: T, found: u8 },
+ #[error(transparent)]
+ FromUtf8Error(#[from] std::string::FromUtf8Error),
+ #[error(transparent)]
+ Utf8Error(#[from] std::str::Utf8Error),
+ #[error("a length is required for most things")]
+ LenLess,
+ #[error("char {0} was not in the valid range of chars")]
+ NotChar(u32),
+ #[error("out of bounds index")]
+ OOB,
+ #[error("{0} is not a tag")]
+ NotTag(u8),
+ #[error("out of range")]
+ Overflow,
+}
+
+impl serde::ser::Error for Error {
+ fn custom<T>(msg: T) -> Self
+ where
+ T: Display,
+ {
+ Self::Message(msg.to_string())
+ }
+}
+impl serde::de::Error for Error {
+ fn custom<T>(msg: T) -> Self
+ where
+ T: Display,
+ {
+ Self::Message(msg.to_string())
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..b7b304f
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,8 @@
+#![cfg_attr(test, feature(generic_const_exprs, min_adt_const_params))]
+mod de;
+mod error;
+mod ser;
+pub(crate) mod serde;
+pub use de::{Deserializer, from_bytes};
+pub use error::{Error, Result};
+pub use ser::{Serializer, to_bytes};
diff --git a/src/ser.rs b/src/ser.rs
new file mode 100644
index 0000000..82b3600
--- /dev/null
+++ b/src/ser.rs
@@ -0,0 +1,470 @@
+use crate::error::{Error, Result};
+use crate::serde::T;
+use raad::le::*;
+use serde::{Serialize, ser};
+use std::io::Write;
+
+pub fn to_bytes<T>(value: &T) -> Result<Vec<u8>>
+where
+ T: Serialize,
+{
+ let mut v = vec![];
+ let mut serializer = Serializer { w: &mut v };
+ value.serialize(&mut serializer)?;
+ Ok(v)
+}
+pub struct Serializer<W: std::io::Write> {
+ w: W,
+}
+impl<W: std::io::Write> Serializer<W> {
+ fn wh(&mut self, h: T, w: impl raad::le::Writable) -> Result<()> {
+ self.t(h)?;
+ self.w.w(w)?;
+ Ok(())
+ // self.w.w(e);
+ }
+ fn t(&mut self, h: T) -> Result<()> {
+ self.w.w(h as u8)?;
+ Ok(())
+ }
+ fn leb128h(&mut self, h: T, w: impl Into<u128>) -> Result<()> {
+ self.t(h)?;
+ self.leb128(w)
+ }
+ fn leb128(&mut self, w: impl Into<u128>) -> Result<()> {
+ let mut w = w.into();
+ loop {
+ let n = (w & 127) as u8;
+ w >>= 7;
+ if w == 0 {
+ self.w.w(n)?;
+ break;
+ } else {
+ self.w.w(n | 1 << 7)?;
+ }
+ }
+ Ok(())
+ }
+ fn sleb128(&mut self, mut value: i128) -> Result<()> {
+ loop {
+ let n = (value & 127) as u8;
+ value >>= 7;
+ let sign_bit = n & 64;
+ if (value == 0 && sign_bit == 0) || (value == -1 && sign_bit != 0) {
+ self.w.w(n)?;
+ break;
+ } else {
+ self.w.w(n | 1 << 7)?;
+ }
+ }
+ Ok(())
+ }
+}
+
+impl<W: std::io::Write> ser::Serializer for &mut Serializer<W> {
+ // The output type produced by this `Serializer` during successful
+ // serialization. Most serializers that produce text or binary output should
+ // set `Ok = ()` and serialize into an `io::Write` or buffer contained
+ // within the `Serializer` instance, as happens here. Serializers that build
+ // in-memory data structures may be simplified by using `Ok` to propagate
+ // the data structure around.
+ type Ok = ();
+
+ // The error type when some error occurs during serialization.
+ type Error = Error;
+
+ // Associated types for keeping track of additional state while serializing
+ // compound data structures like sequences and maps. In this case no
+ // additional state is required beyond what is already stored in the
+ // Serializer struct.
+ type SerializeSeq = Self;
+ type SerializeTuple = Self;
+ type SerializeTupleStruct = Self;
+ type SerializeTupleVariant = Self;
+ type SerializeMap = Self;
+ type SerializeStruct = Self;
+ type SerializeStructVariant = Self;
+
+ // Here we go with the simple methods. The following 12 methods receive one
+ // of the primitive types of the data model and map it to JSON by appending
+ // into the output string.
+ fn serialize_bool(self, v: bool) -> Result<()> {
+ // println!("serialize bool {v}");
+ self.w.w(v as u8)?;
+ Ok(())
+ }
+
+ // JSON does not distinguish between different sizes of integers, so all
+ // signed integers will be serialized the same and all unsigned integers
+ // will be serialized the same. Other formats, especially compact binary
+ // formats, may need independent logic for the different sizes.
+ fn serialize_i8(self, v: i8) -> Result<()> {
+ self.serialize_i64(i64::from(v))
+ }
+
+ fn serialize_i16(self, v: i16) -> Result<()> {
+ self.serialize_i64(i64::from(v))
+ }
+
+ fn serialize_i32(self, v: i32) -> Result<()> {
+ self.serialize_i64(i64::from(v))
+ }
+
+ fn serialize_i64(self, v: i64) -> Result<()> {
+ // println!("serialize i64 {v}");
+ self.t(T::Int)?;
+ self.sleb128(v as i128)
+ }
+
+ fn serialize_u8(self, v: u8) -> Result<()> {
+ self.serialize_u64(u64::from(v))
+ }
+
+ fn serialize_u16(self, v: u16) -> Result<()> {
+ self.serialize_u64(u64::from(v))
+ }
+
+ fn serialize_u32(self, v: u32) -> Result<()> {
+ self.serialize_u64(u64::from(v))
+ }
+
+ fn serialize_u64(self, v: u64) -> Result<()> {
+ // println!("serialize u64 {v}");
+ self.leb128h(T::Uint, v)
+ }
+
+ fn serialize_f32(self, v: f32) -> Result<()> {
+ // println!("serialize f32 {v}");
+ self.wh(T::Float, v)
+ }
+
+ fn serialize_f64(self, v: f64) -> Result<()> {
+ // println!("serialize f64 {v}");
+ self.wh(T::Double, v)
+ }
+
+ fn serialize_char(self, v: char) -> Result<()> {
+ self.serialize_u32(u32::from(v))
+ }
+
+ fn serialize_str(self, v: &str) -> Result<()> {
+ self.serialize_bytes(v.as_bytes())
+ }
+
+ fn serialize_bytes(self, v: &[u8]) -> Result<()> {
+ // println!("serialize bytes {v:?}");
+ self.leb128h(T::String, v.len() as u128)?;
+ self.w.w(v)?;
+ Ok(())
+ }
+
+ // An absent optional is represented as the JSON `null`.
+ fn serialize_none(self) -> Result<()> {
+ self.t(T::None)
+ }
+
+ fn serialize_some<U>(self, value: &U) -> Result<()>
+ where
+ U: ?Sized + Serialize,
+ {
+ self.t(T::Some)?;
+ value.serialize(self)
+ }
+
+ fn serialize_unit(self) -> Result<()> {
+ self.serialize_none()
+ }
+
+ fn serialize_unit_struct(self, _name: &'static str) -> Result<()> {
+ self.serialize_none()
+ }
+
+ fn serialize_unit_variant(
+ self,
+ _name: &'static str,
+ i: u32,
+ _variant: &'static str,
+ ) -> Result<()> {
+ // println!("uv");
+ self.w.w(T::UVariant as u8)?;
+ self.serialize_u32(i)
+ }
+
+ // As is done here, serializers are encouraged to treat newtype structs as
+ // insignificant wrappers around the data they contain.
+ fn serialize_newtype_struct<T>(self, _name: &'static str, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(self)
+ }
+
+ // Note that newtype variant (and all of the other variant serialization
+ // methods) refer exclusively to the "externally tagged" enum
+ // representation.
+ //
+ // Serialize this to JSON in externally tagged form as `{ NAME: VALUE }`.
+ fn serialize_newtype_variant<U>(
+ self,
+ _name: &'static str,
+ ix: u32,
+ _vname: &'static str,
+ value: &U,
+ ) -> Result<()>
+ where
+ U: ?Sized + Serialize,
+ {
+ // println!("serialize variant {_vname} of {_name} ix {ix}");
+ self.w.w(T::NVariant as u8)?;
+ self.serialize_u32(ix)?;
+ value.serialize(self)
+ }
+
+ // Now we get to the serialization of compound types.
+ //
+ // The start of the sequence, each value, and the end are three separate
+ // method calls. This one is responsible only for serializing the start,
+ // which in JSON is `[`.
+ //
+ // The length of the sequence may or may not be known ahead of time. This
+ // doesn't make a difference in JSON because the length is not represented
+ // explicitly in the serialized form. Some serializers may only be able to
+ // support sequences for which the length is known up front.
+ fn serialize_seq(self, l: Option<usize>) -> Result<Self::SerializeSeq> {
+ // println!("serialize list of len {l:?}");
+ self.leb128h(T::List, l.unwrap() as u128)?;
+ Ok(self)
+ }
+
+ fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple> {
+ self.serialize_seq(Some(len))
+ }
+
+ // Tuple structs look just like sequences in JSON.
+ fn serialize_tuple_struct(
+ self,
+ _name: &'static str,
+ len: usize,
+ ) -> Result<Self::SerializeTupleStruct> {
+ self.serialize_seq(Some(len))
+ }
+
+ // Tuple variants are represented in JSON as `{ NAME: [DATA...] }`. Again
+ // this method is only responsible for the externally tagged representation.
+ fn serialize_tuple_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeTupleVariant> {
+ // println!("serialize tuple variant {_variant} of {_name} {_variant_index} {_variant}");
+ self.w.w(T::TVariant as u8)?;
+ self.serialize_u32(_variant_index)?;
+ self.leb128(_len as u128)?;
+ // self.output += "{";
+ // variant.serialize(&mut *self)?;
+ // self.output += ":[";
+ Ok(self)
+ }
+
+ // Maps are represented in JSON as `{ K: V, K: V, ... }`.
+ fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> {
+ self.w.w(T::Map as u8)?;
+ // println!("{_len:?}");
+ self.leb128(_len.ok_or(Error::LenLess)? as u128)?;
+ Ok(self)
+
+ // Ok(self)
+ }
+
+ // Structs look just like maps in JSON. In particular, JSON requires that we
+ // serialize the field names of the struct. Other formats may be able to
+ // omit the field names when serializing structs because the corresponding
+ // Deserialize implementation is required to know what the keys are without
+ // looking at the serialized data.
+ fn serialize_struct(self, _name: &'static str, len: usize) -> Result<Self::SerializeStruct> {
+ self.serialize_map(Some(len))
+ }
+
+ // Struct variants are represented in JSON as `{ NAME: { K: V, ... } }`.
+ // This is the externally tagged representation.
+ fn serialize_struct_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeStructVariant> {
+ // println!("ser struct v {_name} {_variant_index} {variant} {_len}");
+ self.w.w(T::SVariant as u8)?;
+ self.serialize_u32(_variant_index)?;
+ self.leb128(_len as u128)?;
+ Ok(self)
+ }
+}
+
+// The following 7 impls deal with the serialization of compound types like
+// sequences and maps. Serialization of such types is begun by a Serializer
+// method and followed by zero or more calls to serialize individual elements of
+// the compound type and one call to end the compound type.
+//
+// This impl is SerializeSeq so these methods are called after `serialize_seq`
+// is called on the Serializer.
+impl<W: Write> ser::SerializeSeq for &mut Serializer<W> {
+ // Must match the `Ok` type of the serializer.
+ type Ok = ();
+ // Must match the `Error` type of the serializer.
+ type Error = Error;
+
+ // Serialize a single element of the sequence.
+ fn serialize_element<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ // Close the sequence.
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+
+// Same thing but for tuples.
+impl<W: Write> ser::SerializeTuple for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_element<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+
+// Same thing but for tuple structs.
+impl<W: Write> ser::SerializeTupleStruct for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+/// A Seq.
+impl<W: Write> ser::SerializeTupleVariant for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+
+// Some `Serialize` types are not able to hold a key and value in memory at the
+// same time so `SerializeMap` implementations are required to support
+// `serialize_key` and `serialize_value` individually.
+//
+// There is a third optional method on the `SerializeMap` trait. The
+// `serialize_entry` method allows serializers to optimize for the case where
+// key and value are both available simultaneously. In JSON it doesn't make a
+// difference so the default behavior for `serialize_entry` is fine.
+impl<W: Write> ser::SerializeMap for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ // The Serde data model allows map keys to be any serializable type. JSON
+ // only allows string keys so the implementation below will produce invalid
+ // JSON if the key serializes as something other than a string.
+ //
+ // A real JSON serializer would need to validate that map keys are strings.
+ // This can be done by using a different Serializer to serialize the key
+ // (instead of `&mut **self`) and having that other serializer only
+ // implement `serialize_str` and return an error on any other data type.
+ fn serialize_key<T>(&mut self, key: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ key.serialize(&mut **self)
+ }
+
+ // It doesn't make a difference whether the colon is printed at the end of
+ // `serialize_key` or at the beginning of `serialize_value`. In this case
+ // the code is a bit simpler having it here.
+ fn serialize_value<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+
+// Structs are like maps in which the keys are constrained to be compile-time
+// constant strings.
+impl<W: Write> ser::SerializeStruct for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ // key.serialize(&mut **self)?;
+ key.serialize(&mut **self)?;
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+
+ fn skip_field(&mut self, key: &'static str) -> std::prelude::v1::Result<(), Self::Error> {
+ let _ = key;
+ Ok(())
+ }
+}
+
+// Similar to `SerializeTupleVariant`, here the `end` method is responsible for
+// closing both of the curly braces opened by `serialize_struct_variant`.
+impl<W: Write> ser::SerializeStructVariant for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ key.serialize(&mut **self)?;
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+#[cfg(test)]
+mod tests;
diff --git a/src/ser/tests.rs b/src/ser/tests.rs
new file mode 100644
index 0000000..eb9e968
--- /dev/null
+++ b/src/ser/tests.rs
@@ -0,0 +1,107 @@
+use super::*;
+use atools::prelude::*;
+const fn lenleb128(x: u128) -> usize {
+ let mut n2 = 0;
+ let mut w = x;
+ loop {
+ w >>= 7;
+ if w == 0 {
+ n2 += 1;
+ break;
+ } else {
+ n2 += 1;
+ }
+ }
+ n2
+}
+const fn leb128<const X: u128>() -> [u8; lenleb128(X)] {
+ let mut i = 0;
+ let mut into: [u8; _] = [0; _];
+ let mut w = X;
+ loop {
+ let n = (w & 127) as u8;
+ w >>= 7;
+ if w == 0 {
+ into[i] = n;
+ break;
+ } else {
+ into[i] = n | 1 << 7;
+ }
+ i += 1;
+ }
+ into
+}
+
+#[test]
+fn test_struct() {
+ #[derive(Serialize)]
+ struct Test {
+ int: u32,
+ seq: Vec<&'static str>,
+ }
+
+ let test = Test {
+ int: 1,
+ seq: vec!["hello", "bendncode"],
+ };
+ let expected = (*b"m")
+ .couple(leb128::<2>())
+ .couple(*b"s\x03intu")
+ .couple(leb128::<1>())
+ .couple(*b"s\x03seq")
+ .join(b'l')
+ .couple(leb128::<2>())
+ .join(b's')
+ .couple(leb128::<5>())
+ .couple(*b"hello")
+ .join(b's')
+ .couple(leb128::<9>())
+ .couple(*b"bendncode");
+
+ assert_eq!(to_bytes(&test).unwrap(), expected);
+}
+
+#[test]
+fn test_enum() {
+ #[derive(Serialize)]
+ enum E {
+ Unit,
+ Newtype(u32),
+ Tuple(u32, u32),
+ Struct { a: u32 },
+ }
+
+ let u = E::Unit;
+ let expected = [b'n', b'u', 0];
+ assert_eq!(to_bytes(&u).unwrap(), expected);
+
+ // println!("--");
+
+ let n = E::Newtype(51);
+ let expected = b"vu"
+ .couple(leb128::<1>())
+ .join(b'u') // i rather dislike this byte
+ .couple(leb128::<51>());
+
+ assert_eq!(to_bytes(&n).unwrap(), expected);
+ // println!("--");
+ let t = E::Tuple(1, 24151561);
+ let expected = b"xu"
+ .couple(leb128::<2>())
+ .couple(leb128::<2>())
+ .join(b'u')
+ .couple(leb128::<1>())
+ .join(b'u')
+ .couple(leb128::<24151561>());
+ assert_eq!(to_bytes(&t).unwrap(), expected);
+ // println!("--");
+ let s = E::Struct { a: 1 };
+ let expected = b"yu"
+ .couple(leb128::<3>())
+ .couple(leb128::<1>())
+ .join(b's')
+ .couple(leb128::<1>())
+ .couple(*b"au")
+ .couple(leb128::<1>());
+ assert_eq!(to_bytes(&s).unwrap(), expected);
+}
diff --git a/src/serde.rs b/src/serde.rs
new file mode 100644
index 0000000..1948e69
--- /dev/null
+++ b/src/serde.rs
@@ -0,0 +1,26 @@
+#[repr(u8)]
+#[derive(Copy, Clone, Debug)]
+pub enum T {
+ False = 0,
+ True = 1,
+
+ Int = b'i',
+ Uint = b'u',
+ Float = b'f',
+ Double = b'd',
+ String = b's',
+ NVariant = b'v',
+ TVariant = b'x',
+ SVariant = b'y',
+ UVariant = b'n',
+ List = b'l',
+ // Tuple = b't',
+ Map = b'm',
+ None = b'z',
+ Some = b'o',
+}
+impl PartialEq<u8> for T {
+ fn eq(&self, other: &u8) -> bool {
+ *self as u8 == *other
+ }
+}