bencode inspired tight self describing serialization format
Diffstat (limited to 'src/ser.rs')
| -rw-r--r-- | src/ser.rs | 470 |
1 files changed, 470 insertions, 0 deletions
diff --git a/src/ser.rs b/src/ser.rs new file mode 100644 index 0000000..82b3600 --- /dev/null +++ b/src/ser.rs @@ -0,0 +1,470 @@ +use crate::error::{Error, Result}; +use crate::serde::T; +use raad::le::*; +use serde::{Serialize, ser}; +use std::io::Write; + +pub fn to_bytes<T>(value: &T) -> Result<Vec<u8>> +where + T: Serialize, +{ + let mut v = vec![]; + let mut serializer = Serializer { w: &mut v }; + value.serialize(&mut serializer)?; + Ok(v) +} +pub struct Serializer<W: std::io::Write> { + w: W, +} +impl<W: std::io::Write> Serializer<W> { + fn wh(&mut self, h: T, w: impl raad::le::Writable) -> Result<()> { + self.t(h)?; + self.w.w(w)?; + Ok(()) + // self.w.w(e); + } + fn t(&mut self, h: T) -> Result<()> { + self.w.w(h as u8)?; + Ok(()) + } + fn leb128h(&mut self, h: T, w: impl Into<u128>) -> Result<()> { + self.t(h)?; + self.leb128(w) + } + fn leb128(&mut self, w: impl Into<u128>) -> Result<()> { + let mut w = w.into(); + loop { + let n = (w & 127) as u8; + w >>= 7; + if w == 0 { + self.w.w(n)?; + break; + } else { + self.w.w(n | 1 << 7)?; + } + } + Ok(()) + } + fn sleb128(&mut self, mut value: i128) -> Result<()> { + loop { + let n = (value & 127) as u8; + value >>= 7; + let sign_bit = n & 64; + if (value == 0 && sign_bit == 0) || (value == -1 && sign_bit != 0) { + self.w.w(n)?; + break; + } else { + self.w.w(n | 1 << 7)?; + } + } + Ok(()) + } +} + +impl<W: std::io::Write> ser::Serializer for &mut Serializer<W> { + // The output type produced by this `Serializer` during successful + // serialization. Most serializers that produce text or binary output should + // set `Ok = ()` and serialize into an `io::Write` or buffer contained + // within the `Serializer` instance, as happens here. Serializers that build + // in-memory data structures may be simplified by using `Ok` to propagate + // the data structure around. + type Ok = (); + + // The error type when some error occurs during serialization. + type Error = Error; + + // Associated types for keeping track of additional state while serializing + // compound data structures like sequences and maps. In this case no + // additional state is required beyond what is already stored in the + // Serializer struct. + type SerializeSeq = Self; + type SerializeTuple = Self; + type SerializeTupleStruct = Self; + type SerializeTupleVariant = Self; + type SerializeMap = Self; + type SerializeStruct = Self; + type SerializeStructVariant = Self; + + // Here we go with the simple methods. The following 12 methods receive one + // of the primitive types of the data model and map it to JSON by appending + // into the output string. + fn serialize_bool(self, v: bool) -> Result<()> { + // println!("serialize bool {v}"); + self.w.w(v as u8)?; + Ok(()) + } + + // JSON does not distinguish between different sizes of integers, so all + // signed integers will be serialized the same and all unsigned integers + // will be serialized the same. Other formats, especially compact binary + // formats, may need independent logic for the different sizes. + fn serialize_i8(self, v: i8) -> Result<()> { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i16(self, v: i16) -> Result<()> { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i32(self, v: i32) -> Result<()> { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i64(self, v: i64) -> Result<()> { + // println!("serialize i64 {v}"); + self.t(T::Int)?; + self.sleb128(v as i128) + } + + fn serialize_u8(self, v: u8) -> Result<()> { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u16(self, v: u16) -> Result<()> { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u32(self, v: u32) -> Result<()> { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u64(self, v: u64) -> Result<()> { + // println!("serialize u64 {v}"); + self.leb128h(T::Uint, v) + } + + fn serialize_f32(self, v: f32) -> Result<()> { + // println!("serialize f32 {v}"); + self.wh(T::Float, v) + } + + fn serialize_f64(self, v: f64) -> Result<()> { + // println!("serialize f64 {v}"); + self.wh(T::Double, v) + } + + fn serialize_char(self, v: char) -> Result<()> { + self.serialize_u32(u32::from(v)) + } + + fn serialize_str(self, v: &str) -> Result<()> { + self.serialize_bytes(v.as_bytes()) + } + + fn serialize_bytes(self, v: &[u8]) -> Result<()> { + // println!("serialize bytes {v:?}"); + self.leb128h(T::String, v.len() as u128)?; + self.w.w(v)?; + Ok(()) + } + + // An absent optional is represented as the JSON `null`. + fn serialize_none(self) -> Result<()> { + self.t(T::None) + } + + fn serialize_some<U>(self, value: &U) -> Result<()> + where + U: ?Sized + Serialize, + { + self.t(T::Some)?; + value.serialize(self) + } + + fn serialize_unit(self) -> Result<()> { + self.serialize_none() + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { + self.serialize_none() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + i: u32, + _variant: &'static str, + ) -> Result<()> { + // println!("uv"); + self.w.w(T::UVariant as u8)?; + self.serialize_u32(i) + } + + // As is done here, serializers are encouraged to treat newtype structs as + // insignificant wrappers around the data they contain. + fn serialize_newtype_struct<T>(self, _name: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + // Note that newtype variant (and all of the other variant serialization + // methods) refer exclusively to the "externally tagged" enum + // representation. + // + // Serialize this to JSON in externally tagged form as `{ NAME: VALUE }`. + fn serialize_newtype_variant<U>( + self, + _name: &'static str, + ix: u32, + _vname: &'static str, + value: &U, + ) -> Result<()> + where + U: ?Sized + Serialize, + { + // println!("serialize variant {_vname} of {_name} ix {ix}"); + self.w.w(T::NVariant as u8)?; + self.serialize_u32(ix)?; + value.serialize(self) + } + + // Now we get to the serialization of compound types. + // + // The start of the sequence, each value, and the end are three separate + // method calls. This one is responsible only for serializing the start, + // which in JSON is `[`. + // + // The length of the sequence may or may not be known ahead of time. This + // doesn't make a difference in JSON because the length is not represented + // explicitly in the serialized form. Some serializers may only be able to + // support sequences for which the length is known up front. + fn serialize_seq(self, l: Option<usize>) -> Result<Self::SerializeSeq> { + // println!("serialize list of len {l:?}"); + self.leb128h(T::List, l.unwrap() as u128)?; + Ok(self) + } + + fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple> { + self.serialize_seq(Some(len)) + } + + // Tuple structs look just like sequences in JSON. + fn serialize_tuple_struct( + self, + _name: &'static str, + len: usize, + ) -> Result<Self::SerializeTupleStruct> { + self.serialize_seq(Some(len)) + } + + // Tuple variants are represented in JSON as `{ NAME: [DATA...] }`. Again + // this method is only responsible for the externally tagged representation. + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeTupleVariant> { + // println!("serialize tuple variant {_variant} of {_name} {_variant_index} {_variant}"); + self.w.w(T::TVariant as u8)?; + self.serialize_u32(_variant_index)?; + self.leb128(_len as u128)?; + // self.output += "{"; + // variant.serialize(&mut *self)?; + // self.output += ":["; + Ok(self) + } + + // Maps are represented in JSON as `{ K: V, K: V, ... }`. + fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> { + self.w.w(T::Map as u8)?; + // println!("{_len:?}"); + self.leb128(_len.ok_or(Error::LenLess)? as u128)?; + Ok(self) + + // Ok(self) + } + + // Structs look just like maps in JSON. In particular, JSON requires that we + // serialize the field names of the struct. Other formats may be able to + // omit the field names when serializing structs because the corresponding + // Deserialize implementation is required to know what the keys are without + // looking at the serialized data. + fn serialize_struct(self, _name: &'static str, len: usize) -> Result<Self::SerializeStruct> { + self.serialize_map(Some(len)) + } + + // Struct variants are represented in JSON as `{ NAME: { K: V, ... } }`. + // This is the externally tagged representation. + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeStructVariant> { + // println!("ser struct v {_name} {_variant_index} {variant} {_len}"); + self.w.w(T::SVariant as u8)?; + self.serialize_u32(_variant_index)?; + self.leb128(_len as u128)?; + Ok(self) + } +} + +// The following 7 impls deal with the serialization of compound types like +// sequences and maps. Serialization of such types is begun by a Serializer +// method and followed by zero or more calls to serialize individual elements of +// the compound type and one call to end the compound type. +// +// This impl is SerializeSeq so these methods are called after `serialize_seq` +// is called on the Serializer. +impl<W: Write> ser::SerializeSeq for &mut Serializer<W> { + // Must match the `Ok` type of the serializer. + type Ok = (); + // Must match the `Error` type of the serializer. + type Error = Error; + + // Serialize a single element of the sequence. + fn serialize_element<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + // Close the sequence. + fn end(self) -> Result<()> { + Ok(()) + } +} + +// Same thing but for tuples. +impl<W: Write> ser::SerializeTuple for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_element<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +// Same thing but for tuple structs. +impl<W: Write> ser::SerializeTupleStruct for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_field<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} +/// A Seq. +impl<W: Write> ser::SerializeTupleVariant for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_field<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +// Some `Serialize` types are not able to hold a key and value in memory at the +// same time so `SerializeMap` implementations are required to support +// `serialize_key` and `serialize_value` individually. +// +// There is a third optional method on the `SerializeMap` trait. The +// `serialize_entry` method allows serializers to optimize for the case where +// key and value are both available simultaneously. In JSON it doesn't make a +// difference so the default behavior for `serialize_entry` is fine. +impl<W: Write> ser::SerializeMap for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + // The Serde data model allows map keys to be any serializable type. JSON + // only allows string keys so the implementation below will produce invalid + // JSON if the key serializes as something other than a string. + // + // A real JSON serializer would need to validate that map keys are strings. + // This can be done by using a different Serializer to serialize the key + // (instead of `&mut **self`) and having that other serializer only + // implement `serialize_str` and return an error on any other data type. + fn serialize_key<T>(&mut self, key: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + key.serialize(&mut **self) + } + + // It doesn't make a difference whether the colon is printed at the end of + // `serialize_key` or at the beginning of `serialize_value`. In this case + // the code is a bit simpler having it here. + fn serialize_value<T>(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +// Structs are like maps in which the keys are constrained to be compile-time +// constant strings. +impl<W: Write> ser::SerializeStruct for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + // key.serialize(&mut **self)?; + key.serialize(&mut **self)?; + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } + + fn skip_field(&mut self, key: &'static str) -> std::prelude::v1::Result<(), Self::Error> { + let _ = key; + Ok(()) + } +} + +// Similar to `SerializeTupleVariant`, here the `end` method is responsible for +// closing both of the curly braces opened by `serialize_struct_variant`. +impl<W: Write> ser::SerializeStructVariant for &mut Serializer<W> { + type Ok = (); + type Error = Error; + + fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + key.serialize(&mut **self)?; + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} +#[cfg(test)] +mod tests; |