bencode inspired tight self describing serialization format
Diffstat (limited to 'src/ser.rs')
-rw-r--r--src/ser.rs470
1 files changed, 470 insertions, 0 deletions
diff --git a/src/ser.rs b/src/ser.rs
new file mode 100644
index 0000000..82b3600
--- /dev/null
+++ b/src/ser.rs
@@ -0,0 +1,470 @@
+use crate::error::{Error, Result};
+use crate::serde::T;
+use raad::le::*;
+use serde::{Serialize, ser};
+use std::io::Write;
+
+pub fn to_bytes<T>(value: &T) -> Result<Vec<u8>>
+where
+ T: Serialize,
+{
+ let mut v = vec![];
+ let mut serializer = Serializer { w: &mut v };
+ value.serialize(&mut serializer)?;
+ Ok(v)
+}
+pub struct Serializer<W: std::io::Write> {
+ w: W,
+}
+impl<W: std::io::Write> Serializer<W> {
+ fn wh(&mut self, h: T, w: impl raad::le::Writable) -> Result<()> {
+ self.t(h)?;
+ self.w.w(w)?;
+ Ok(())
+ // self.w.w(e);
+ }
+ fn t(&mut self, h: T) -> Result<()> {
+ self.w.w(h as u8)?;
+ Ok(())
+ }
+ fn leb128h(&mut self, h: T, w: impl Into<u128>) -> Result<()> {
+ self.t(h)?;
+ self.leb128(w)
+ }
+ fn leb128(&mut self, w: impl Into<u128>) -> Result<()> {
+ let mut w = w.into();
+ loop {
+ let n = (w & 127) as u8;
+ w >>= 7;
+ if w == 0 {
+ self.w.w(n)?;
+ break;
+ } else {
+ self.w.w(n | 1 << 7)?;
+ }
+ }
+ Ok(())
+ }
+ fn sleb128(&mut self, mut value: i128) -> Result<()> {
+ loop {
+ let n = (value & 127) as u8;
+ value >>= 7;
+ let sign_bit = n & 64;
+ if (value == 0 && sign_bit == 0) || (value == -1 && sign_bit != 0) {
+ self.w.w(n)?;
+ break;
+ } else {
+ self.w.w(n | 1 << 7)?;
+ }
+ }
+ Ok(())
+ }
+}
+
+impl<W: std::io::Write> ser::Serializer for &mut Serializer<W> {
+ // The output type produced by this `Serializer` during successful
+ // serialization. Most serializers that produce text or binary output should
+ // set `Ok = ()` and serialize into an `io::Write` or buffer contained
+ // within the `Serializer` instance, as happens here. Serializers that build
+ // in-memory data structures may be simplified by using `Ok` to propagate
+ // the data structure around.
+ type Ok = ();
+
+ // The error type when some error occurs during serialization.
+ type Error = Error;
+
+ // Associated types for keeping track of additional state while serializing
+ // compound data structures like sequences and maps. In this case no
+ // additional state is required beyond what is already stored in the
+ // Serializer struct.
+ type SerializeSeq = Self;
+ type SerializeTuple = Self;
+ type SerializeTupleStruct = Self;
+ type SerializeTupleVariant = Self;
+ type SerializeMap = Self;
+ type SerializeStruct = Self;
+ type SerializeStructVariant = Self;
+
+ // Here we go with the simple methods. The following 12 methods receive one
+ // of the primitive types of the data model and map it to JSON by appending
+ // into the output string.
+ fn serialize_bool(self, v: bool) -> Result<()> {
+ // println!("serialize bool {v}");
+ self.w.w(v as u8)?;
+ Ok(())
+ }
+
+ // JSON does not distinguish between different sizes of integers, so all
+ // signed integers will be serialized the same and all unsigned integers
+ // will be serialized the same. Other formats, especially compact binary
+ // formats, may need independent logic for the different sizes.
+ fn serialize_i8(self, v: i8) -> Result<()> {
+ self.serialize_i64(i64::from(v))
+ }
+
+ fn serialize_i16(self, v: i16) -> Result<()> {
+ self.serialize_i64(i64::from(v))
+ }
+
+ fn serialize_i32(self, v: i32) -> Result<()> {
+ self.serialize_i64(i64::from(v))
+ }
+
+ fn serialize_i64(self, v: i64) -> Result<()> {
+ // println!("serialize i64 {v}");
+ self.t(T::Int)?;
+ self.sleb128(v as i128)
+ }
+
+ fn serialize_u8(self, v: u8) -> Result<()> {
+ self.serialize_u64(u64::from(v))
+ }
+
+ fn serialize_u16(self, v: u16) -> Result<()> {
+ self.serialize_u64(u64::from(v))
+ }
+
+ fn serialize_u32(self, v: u32) -> Result<()> {
+ self.serialize_u64(u64::from(v))
+ }
+
+ fn serialize_u64(self, v: u64) -> Result<()> {
+ // println!("serialize u64 {v}");
+ self.leb128h(T::Uint, v)
+ }
+
+ fn serialize_f32(self, v: f32) -> Result<()> {
+ // println!("serialize f32 {v}");
+ self.wh(T::Float, v)
+ }
+
+ fn serialize_f64(self, v: f64) -> Result<()> {
+ // println!("serialize f64 {v}");
+ self.wh(T::Double, v)
+ }
+
+ fn serialize_char(self, v: char) -> Result<()> {
+ self.serialize_u32(u32::from(v))
+ }
+
+ fn serialize_str(self, v: &str) -> Result<()> {
+ self.serialize_bytes(v.as_bytes())
+ }
+
+ fn serialize_bytes(self, v: &[u8]) -> Result<()> {
+ // println!("serialize bytes {v:?}");
+ self.leb128h(T::String, v.len() as u128)?;
+ self.w.w(v)?;
+ Ok(())
+ }
+
+ // An absent optional is represented as the JSON `null`.
+ fn serialize_none(self) -> Result<()> {
+ self.t(T::None)
+ }
+
+ fn serialize_some<U>(self, value: &U) -> Result<()>
+ where
+ U: ?Sized + Serialize,
+ {
+ self.t(T::Some)?;
+ value.serialize(self)
+ }
+
+ fn serialize_unit(self) -> Result<()> {
+ self.serialize_none()
+ }
+
+ fn serialize_unit_struct(self, _name: &'static str) -> Result<()> {
+ self.serialize_none()
+ }
+
+ fn serialize_unit_variant(
+ self,
+ _name: &'static str,
+ i: u32,
+ _variant: &'static str,
+ ) -> Result<()> {
+ // println!("uv");
+ self.w.w(T::UVariant as u8)?;
+ self.serialize_u32(i)
+ }
+
+ // As is done here, serializers are encouraged to treat newtype structs as
+ // insignificant wrappers around the data they contain.
+ fn serialize_newtype_struct<T>(self, _name: &'static str, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(self)
+ }
+
+ // Note that newtype variant (and all of the other variant serialization
+ // methods) refer exclusively to the "externally tagged" enum
+ // representation.
+ //
+ // Serialize this to JSON in externally tagged form as `{ NAME: VALUE }`.
+ fn serialize_newtype_variant<U>(
+ self,
+ _name: &'static str,
+ ix: u32,
+ _vname: &'static str,
+ value: &U,
+ ) -> Result<()>
+ where
+ U: ?Sized + Serialize,
+ {
+ // println!("serialize variant {_vname} of {_name} ix {ix}");
+ self.w.w(T::NVariant as u8)?;
+ self.serialize_u32(ix)?;
+ value.serialize(self)
+ }
+
+ // Now we get to the serialization of compound types.
+ //
+ // The start of the sequence, each value, and the end are three separate
+ // method calls. This one is responsible only for serializing the start,
+ // which in JSON is `[`.
+ //
+ // The length of the sequence may or may not be known ahead of time. This
+ // doesn't make a difference in JSON because the length is not represented
+ // explicitly in the serialized form. Some serializers may only be able to
+ // support sequences for which the length is known up front.
+ fn serialize_seq(self, l: Option<usize>) -> Result<Self::SerializeSeq> {
+ // println!("serialize list of len {l:?}");
+ self.leb128h(T::List, l.unwrap() as u128)?;
+ Ok(self)
+ }
+
+ fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple> {
+ self.serialize_seq(Some(len))
+ }
+
+ // Tuple structs look just like sequences in JSON.
+ fn serialize_tuple_struct(
+ self,
+ _name: &'static str,
+ len: usize,
+ ) -> Result<Self::SerializeTupleStruct> {
+ self.serialize_seq(Some(len))
+ }
+
+ // Tuple variants are represented in JSON as `{ NAME: [DATA...] }`. Again
+ // this method is only responsible for the externally tagged representation.
+ fn serialize_tuple_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeTupleVariant> {
+ // println!("serialize tuple variant {_variant} of {_name} {_variant_index} {_variant}");
+ self.w.w(T::TVariant as u8)?;
+ self.serialize_u32(_variant_index)?;
+ self.leb128(_len as u128)?;
+ // self.output += "{";
+ // variant.serialize(&mut *self)?;
+ // self.output += ":[";
+ Ok(self)
+ }
+
+ // Maps are represented in JSON as `{ K: V, K: V, ... }`.
+ fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> {
+ self.w.w(T::Map as u8)?;
+ // println!("{_len:?}");
+ self.leb128(_len.ok_or(Error::LenLess)? as u128)?;
+ Ok(self)
+
+ // Ok(self)
+ }
+
+ // Structs look just like maps in JSON. In particular, JSON requires that we
+ // serialize the field names of the struct. Other formats may be able to
+ // omit the field names when serializing structs because the corresponding
+ // Deserialize implementation is required to know what the keys are without
+ // looking at the serialized data.
+ fn serialize_struct(self, _name: &'static str, len: usize) -> Result<Self::SerializeStruct> {
+ self.serialize_map(Some(len))
+ }
+
+ // Struct variants are represented in JSON as `{ NAME: { K: V, ... } }`.
+ // This is the externally tagged representation.
+ fn serialize_struct_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeStructVariant> {
+ // println!("ser struct v {_name} {_variant_index} {variant} {_len}");
+ self.w.w(T::SVariant as u8)?;
+ self.serialize_u32(_variant_index)?;
+ self.leb128(_len as u128)?;
+ Ok(self)
+ }
+}
+
+// The following 7 impls deal with the serialization of compound types like
+// sequences and maps. Serialization of such types is begun by a Serializer
+// method and followed by zero or more calls to serialize individual elements of
+// the compound type and one call to end the compound type.
+//
+// This impl is SerializeSeq so these methods are called after `serialize_seq`
+// is called on the Serializer.
+impl<W: Write> ser::SerializeSeq for &mut Serializer<W> {
+ // Must match the `Ok` type of the serializer.
+ type Ok = ();
+ // Must match the `Error` type of the serializer.
+ type Error = Error;
+
+ // Serialize a single element of the sequence.
+ fn serialize_element<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ // Close the sequence.
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+
+// Same thing but for tuples.
+impl<W: Write> ser::SerializeTuple for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_element<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+
+// Same thing but for tuple structs.
+impl<W: Write> ser::SerializeTupleStruct for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+/// A Seq.
+impl<W: Write> ser::SerializeTupleVariant for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+
+// Some `Serialize` types are not able to hold a key and value in memory at the
+// same time so `SerializeMap` implementations are required to support
+// `serialize_key` and `serialize_value` individually.
+//
+// There is a third optional method on the `SerializeMap` trait. The
+// `serialize_entry` method allows serializers to optimize for the case where
+// key and value are both available simultaneously. In JSON it doesn't make a
+// difference so the default behavior for `serialize_entry` is fine.
+impl<W: Write> ser::SerializeMap for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ // The Serde data model allows map keys to be any serializable type. JSON
+ // only allows string keys so the implementation below will produce invalid
+ // JSON if the key serializes as something other than a string.
+ //
+ // A real JSON serializer would need to validate that map keys are strings.
+ // This can be done by using a different Serializer to serialize the key
+ // (instead of `&mut **self`) and having that other serializer only
+ // implement `serialize_str` and return an error on any other data type.
+ fn serialize_key<T>(&mut self, key: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ key.serialize(&mut **self)
+ }
+
+ // It doesn't make a difference whether the colon is printed at the end of
+ // `serialize_key` or at the beginning of `serialize_value`. In this case
+ // the code is a bit simpler having it here.
+ fn serialize_value<T>(&mut self, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+
+// Structs are like maps in which the keys are constrained to be compile-time
+// constant strings.
+impl<W: Write> ser::SerializeStruct for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ // key.serialize(&mut **self)?;
+ key.serialize(&mut **self)?;
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+
+ fn skip_field(&mut self, key: &'static str) -> std::prelude::v1::Result<(), Self::Error> {
+ let _ = key;
+ Ok(())
+ }
+}
+
+// Similar to `SerializeTupleVariant`, here the `end` method is responsible for
+// closing both of the curly braces opened by `serialize_struct_variant`.
+impl<W: Write> ser::SerializeStructVariant for &mut Serializer<W> {
+ type Ok = ();
+ type Error = Error;
+
+ fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()>
+ where
+ T: ?Sized + Serialize,
+ {
+ key.serialize(&mut **self)?;
+ value.serialize(&mut **self)
+ }
+
+ fn end(self) -> Result<()> {
+ Ok(())
+ }
+}
+#[cfg(test)]
+mod tests;