Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,15 @@ where
// We shouldn't have both `$value` and `$text` fields in the same
// struct, so if we have `$value` field, the we should deserialize
// text content to `$value`
DeEvent::Text(_) if self.has_value_field => {
DeEvent::Text(_) | DeEvent::Binary(_) if self.has_value_field => {
self.source = ValueSource::Content;
// Deserialize `key` from special attribute name which means
// that value should be taken from the text content of the
// XML node
let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
seed.deserialize(de).map(Some)
}
DeEvent::Text(_) => {
DeEvent::Text(_) | DeEvent::Binary(_) => {
self.source = ValueSource::Text;
// Deserialize `key` from special attribute name which means
// that value should be taken from the text content of the
Expand Down Expand Up @@ -943,6 +943,9 @@ where
// SAFETY: we just checked that the next event is Text
_ => unreachable!(),
},
DeEvent::Binary(_) => Err(Self::Error::Unsupported(
"undecodable binary data among a sequence of xml elements".into(),
)),
DeEvent::Start(_) => match self.map.de.next()? {
DeEvent::Start(start) => seed
.deserialize(ElementDeserializer {
Expand Down
74 changes: 71 additions & 3 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2056,6 +2056,31 @@ impl<'a> From<&'a str> for Text<'a> {
}
}

/// Docs
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Binary<'a> {
/// Field
pub text: Cow<'a, [u8]>,
}

impl<'a> Deref for Binary<'a> {
type Target = [u8];

#[inline]
fn deref(&self) -> &Self::Target {
self.text.deref()
}
}

impl<'a> From<&'a [u8]> for Binary<'a> {
#[inline]
fn from(text: &'a [u8]) -> Self {
Self {
text: Cow::Borrowed(text),
}
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Simplified event which contains only these variants that used by deserializer
Expand All @@ -2074,6 +2099,8 @@ pub enum DeEvent<'a> {
/// [`Comment`]: Event::Comment
/// [`PI`]: Event::PI
Text(Text<'a>),
/// Binary undecoded
Binary(Binary<'a>),
/// End of XML document.
Eof,
}
Expand Down Expand Up @@ -2217,7 +2244,16 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
// FIXME: Actually, we should trim after decoding text, but now we trim before
continue;
}
self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
match e
.unescape_with(|entity| self.entity_resolver.resolve(entity))
.map(|res| self.drain_text(res))
{
Ok(x) => x,
// failed to escape treat as binary blob.
Err(_) => Ok(DeEvent::Binary(Binary {
text: e.into_inner(),
})),
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We definitely shouldn't rely on luck here. Binary should be explicitly requested for the field via flag in field name

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the best way/mechanism to maintain context in the code to keep track of flags like that?

}
PayloadEvent::CData(e) => self.drain_text(e.decode()?),
PayloadEvent::DocType(e) => {
Expand Down Expand Up @@ -2687,6 +2723,8 @@ where
fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
match self.next()? {
DeEvent::Text(e) => Ok(e.text),
// SAFETY: Binary event should never be emitted for decoded strings.
DeEvent::Binary(e) => unreachable!("{:?}", e),
// allow one nested level
DeEvent::Start(e) if allow_start => self.read_text(e.name()),
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
Expand All @@ -2708,10 +2746,12 @@ where
// The matching tag name is guaranteed by the reader
DeEvent::End(_) => Ok(e.text),
// SAFETY: Cannot be two consequent Text events, they would be merged into one
DeEvent::Text(_) => unreachable!(),
DeEvent::Text(_) | DeEvent::Binary(_) => unreachable!(),
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
},
// SAFETY: Binary event should never be emitted for decoded strings.
DeEvent::Binary(e) => unreachable!("{:?}", e),
// We can get End event in case of `<tag></tag>` or `<tag/>` input
// Return empty text in that case
// The matching tag name is guaranteed by the reader
Expand Down Expand Up @@ -2827,6 +2867,30 @@ where
}
}

impl<'de, R> Deserializer<'de, IoReader<R>>
where
R: BufRead,
{
/// Create new deserializer that will copy data from the specified reader
/// into internal buffer.
///
/// If you already have a string use [`Self::from_str`] instead, because it
/// will borrow instead of copy. If you have `&[u8]` which is known to represent
/// UTF-8, you can decode it first before using [`from_str`].
///
/// Deserializer created with this method will not resolve custom entities.
pub fn from_custom_reader(reader: Reader<R>) -> Self {
Self::new(
IoReader {
reader,
start_trimmer: StartTrimmer::default(),
buf: Vec::new(),
},
PredefinedEntityResolver,
)
}
}

impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
where
R: BufRead,
Expand Down Expand Up @@ -2884,6 +2948,10 @@ where
Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
Cow::Owned(s) => visitor.visit_string(s),
},
DeEvent::Binary(e) => match e.text {
Cow::Borrowed(s) => visitor.visit_borrowed_bytes(s),
Cow::Owned(s) => visitor.visit_byte_buf(s),
},
DeEvent::Eof => Err(DeError::UnexpectedEof),
}
}
Expand Down Expand Up @@ -2914,7 +2982,7 @@ where
self.read_to_end(s.name())?;
visitor.visit_unit()
}
DeEvent::Text(_) => visitor.visit_unit(),
DeEvent::Text(_) | DeEvent::Binary(_) => visitor.visit_unit(),
// SAFETY: The reader is guaranteed that we don't have unmatched tags
// If we here, then out deserializer has a bug
DeEvent::End(e) => unreachable!("{:?}", e),
Expand Down
2 changes: 1 addition & 1 deletion src/de/var.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ where
seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?,
false,
),
DeEvent::Text(_) => (
DeEvent::Text(_) | DeEvent::Binary(_) => (
seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
true,
),
Expand Down
6 changes: 6 additions & 0 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,4 +464,10 @@ pub mod serialize {
Self::Custom(e.to_string())
}
}
impl From<std::io::Error> for DeError {
#[inline]
fn from(e: std::io::Error) -> Self {
Self::Custom(e.to_string())
}
}
}
Loading