Skip to content

Commit 1955b5b

Browse files
PingasmasterByron
authored andcommitted
fix!: expose raw commit/tag actor headers for round-tripping.
Note that this means you have to call `CommitRef::commiter|author()?` and `TagRef::tagger()?` instead of assuming pre-parsed fields. This PR makes signature handling truly lossless for "creative" emails and other info. We now stash the raw name <email> slice on IdentityRef/SignatureRef and fall back to it when rewriting, so even commits with embedded angle brackets round-trip cleanly (might want to expand to other malformed characters before merging? Parsing and serialization honor that flag but still keep strict validation for normal input. I also added regression coverage for these scenarios.
1 parent 3c2b422 commit 1955b5b

File tree

21 files changed

+243
-162
lines changed

21 files changed

+243
-162
lines changed

examples/log.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,16 @@ fn run(args: Args) -> anyhow::Result<()> {
142142
let info = info?;
143143
let commit = info.object()?;
144144
let commit_ref = commit.decode()?;
145+
let author = commit_ref.author()?;
145146
Ok(LogEntryInfo {
146147
commit_id: commit.id().to_hex().to_string(),
147148
parents: info.parent_ids().map(|id| id.shorten_or_id().to_string()).collect(),
148149
author: {
149150
let mut buf = Vec::new();
150-
commit_ref.author.actor().write_to(&mut buf)?;
151+
author.actor().write_to(&mut buf)?;
151152
buf.into()
152153
},
153-
time: commit_ref.author.time()?.format_or_unix(format::DEFAULT),
154+
time: author.time()?.format_or_unix(format::DEFAULT),
154155
message: commit_ref.message.to_owned(),
155156
})
156157
}),

gix-merge/src/commit/virtual_merge_base.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ pub enum Error {
1919
MergeTree(#[from] crate::tree::Error),
2020
#[error("Failed to write tree for merged merge-base or virtual commit")]
2121
WriteObject(gix_object::write::Error),
22+
#[error("Failed to decode a commit needed to build a virtual merge-base")]
23+
DecodeCommit(#[from] gix_object::decode::Error),
2224
#[error(
2325
"Conflicts occurred when trying to resolve multiple merge-bases by merging them. This is most certainly a bug."
2426
)]
@@ -28,6 +30,8 @@ pub enum Error {
2830
}
2931

3032
pub(super) mod function {
33+
use std::convert::TryFrom;
34+
3135
use gix_object::FindExt;
3236

3337
use super::Error;
@@ -128,7 +132,8 @@ pub(super) mod function {
128132
tree_id: gix_hash::ObjectId,
129133
) -> Result<gix_hash::ObjectId, Error> {
130134
let mut buf = Vec::new();
131-
let mut commit: gix_object::Commit = objects.find_commit(&parent_a, &mut buf)?.into();
135+
let commit_ref = objects.find_commit(&parent_a, &mut buf)?;
136+
let mut commit = gix_object::Commit::try_from(commit_ref)?;
132137
commit.parents = vec![parent_a, parent_b].into();
133138
commit.tree = tree_id;
134139
objects.write(&commit).map_err(Error::WriteObject)

gix-object/src/commit/decode.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,20 @@ pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
4040
.context(StrContext::Expected(
4141
"zero or more 'parent <40 lowercase hex char>'".into(),
4242
)),
43-
(|i: &mut _| parse::header_field(i, b"author", parse::signature))
44-
.context(StrContext::Expected("author <signature>".into())),
45-
(|i: &mut _| parse::header_field(i, b"committer", parse::signature))
46-
.context(StrContext::Expected("committer <signature>".into())),
43+
(|i: &mut _| {
44+
parse::header_field(i, b"author", parse::signature_with_raw).map(|(signature, raw)| {
45+
let _ = signature;
46+
raw
47+
})
48+
})
49+
.context(StrContext::Expected("author <signature>".into())),
50+
(|i: &mut _| {
51+
parse::header_field(i, b"committer", parse::signature_with_raw).map(|(signature, raw)| {
52+
let _ = signature;
53+
raw
54+
})
55+
})
56+
.context(StrContext::Expected("committer <signature>".into())),
4757
opt(|i: &mut _| parse::header_field(i, b"encoding", take_till(0.., NL)))
4858
.context(StrContext::Expected("encoding <encoding>".into())),
4959
repeat(

gix-object/src/commit/mod.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ impl<'a> CommitRef<'a> {
7474

7575
/// Access
7676
impl<'a> CommitRef<'a> {
77+
fn parse_signature(raw: &'a BStr) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
78+
gix_actor::SignatureRef::from_bytes::<crate::decode::ParseError>(raw.as_ref())
79+
.map_err(|err| crate::decode::Error::with_err(err, raw.as_ref()))
80+
}
81+
7782
/// Return the `tree` fields hash digest.
7883
pub fn tree(&self) -> gix_hash::ObjectId {
7984
gix_hash::ObjectId::from_hex(self.tree).expect("prior validation of tree hash during parsing")
@@ -94,15 +99,15 @@ impl<'a> CommitRef<'a> {
9499
/// Return the author, with whitespace trimmed.
95100
///
96101
/// This is different from the `author` field which may contain whitespace.
97-
pub fn author(&self) -> gix_actor::SignatureRef<'a> {
98-
self.author.trim()
102+
pub fn author(&self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
103+
Self::parse_signature(self.author).map(|signature| signature.trim())
99104
}
100105

101106
/// Return the committer, with whitespace trimmed.
102107
///
103108
/// This is different from the `committer` field which may contain whitespace.
104-
pub fn committer(&self) -> gix_actor::SignatureRef<'a> {
105-
self.committer.trim()
109+
pub fn committer(&self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
110+
Self::parse_signature(self.committer).map(|signature| signature.trim())
106111
}
107112

108113
/// Returns a partially parsed message from which more information can be derived.
@@ -111,21 +116,21 @@ impl<'a> CommitRef<'a> {
111116
}
112117

113118
/// Returns the time at which this commit was created, or a default time if it could not be parsed.
114-
pub fn time(&self) -> gix_date::Time {
115-
self.committer.time.parse().unwrap_or_default()
119+
pub fn time(&self) -> Result<gix_date::Time, crate::decode::Error> {
120+
Self::parse_signature(self.committer).map(|signature| signature.time().unwrap_or_default())
116121
}
117122
}
118123

119124
/// Conversion
120125
impl CommitRef<'_> {
121126
/// Copy all fields of this instance into a fully owned commit, consuming this instance.
122-
pub fn into_owned(self) -> Commit {
123-
self.into()
127+
pub fn into_owned(self) -> Result<Commit, crate::decode::Error> {
128+
Commit::try_from(self)
124129
}
125130

126131
/// Copy all fields of this instance into a fully owned commit, internally cloning this instance.
127-
pub fn to_owned(self) -> Commit {
128-
self.clone().into()
132+
pub fn to_owned(self) -> Result<Commit, crate::decode::Error> {
133+
Commit::try_from(self.clone())
129134
}
130135
}
131136

gix-object/src/commit/write.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ impl crate::WriteTo for CommitRef<'_> {
5858
for parent in self.parents() {
5959
encode::trusted_header_id(b"parent", &parent, &mut out)?;
6060
}
61-
encode::trusted_header_signature(b"author", &self.author, &mut out)?;
62-
encode::trusted_header_signature(b"committer", &self.committer, &mut out)?;
61+
encode::trusted_header_field(b"author", self.author.as_ref(), &mut out)?;
62+
encode::trusted_header_field(b"committer", self.committer.as_ref(), &mut out)?;
6363
if let Some(encoding) = self.encoding.as_ref() {
6464
encode::header_field(b"encoding", encoding, &mut out)?;
6565
}
@@ -78,8 +78,8 @@ impl crate::WriteTo for CommitRef<'_> {
7878
let hash_in_hex = self.tree().kind().len_in_hex();
7979
(b"tree".len() + 1 /* space */ + hash_in_hex + 1 /* nl */
8080
+ self.parents.iter().count() * (b"parent".len() + 1 /* space */ + hash_in_hex + 1 /* nl */)
81-
+ b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */
82-
+ b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */
81+
+ b"author".len() + 1 /* space */ + self.author.len() + 1 /* nl */
82+
+ b"committer".len() + 1 /* space */ + self.committer.len() + 1 /* nl */
8383
+ self
8484
.encoding
8585
.as_ref()

gix-object/src/lib.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,16 @@ pub struct CommitRef<'a> {
8888
pub tree: &'a BStr,
8989
/// HEX hash of each parent commit. Empty for first commit in repository.
9090
pub parents: SmallVec<[&'a BStr; 1]>,
91-
/// Who wrote this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping.
91+
/// The raw author header value as encountered during parsing.
9292
///
93-
/// Use the [`author()`](CommitRef::author()) method to received a trimmed version of it.
94-
pub author: gix_actor::SignatureRef<'a>,
95-
/// Who committed this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping.
96-
///
97-
/// Use the [`committer()`](CommitRef::committer()) method to received a trimmed version of it.
93+
/// Use the [`author()`](CommitRef::author()) method to obtain a parsed version of it.
94+
#[cfg_attr(feature = "serde", serde(borrow))]
95+
pub author: &'a BStr,
96+
/// The raw committer header value as encountered during parsing.
9897
///
99-
/// This may be different from the `author` in case the author couldn't write to the repository themselves and
100-
/// is commonly encountered with contributed commits.
101-
pub committer: gix_actor::SignatureRef<'a>,
98+
/// Use the [`committer()`](CommitRef::committer()) method to obtain a parsed version of it.
99+
#[cfg_attr(feature = "serde", serde(borrow))]
100+
pub committer: &'a BStr,
102101
/// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493).
103102
pub encoding: Option<&'a BStr>,
104103
/// The commit message documenting the change.
@@ -150,8 +149,11 @@ pub struct TagRef<'a> {
150149
pub target_kind: Kind,
151150
/// The name of the tag, e.g. "v1.0".
152151
pub name: &'a BStr,
153-
/// The author of the tag.
154-
pub tagger: Option<gix_actor::SignatureRef<'a>>,
152+
/// The raw tagger header value as encountered during parsing.
153+
///
154+
/// Use the [`tagger()`](TagRef::tagger()) method to obtain a parsed version of it.
155+
#[cfg_attr(feature = "serde", serde(borrow))]
156+
pub tagger: Option<&'a BStr>,
155157
/// The message describing this release.
156158
pub message: &'a BStr,
157159
/// A cryptographic signature over the entire content of the serialized tag object thus far.

gix-object/src/object/convert.rs

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,55 @@
1+
use std::convert::TryFrom;
2+
13
use crate::{tree, Blob, BlobRef, Commit, CommitRef, Object, ObjectRef, Tag, TagRef, Tree, TreeRef};
24

3-
impl From<TagRef<'_>> for Tag {
4-
fn from(other: TagRef<'_>) -> Tag {
5+
impl TryFrom<TagRef<'_>> for Tag {
6+
type Error = crate::decode::Error;
7+
8+
fn try_from(other: TagRef<'_>) -> Result<Tag, Self::Error> {
59
let TagRef {
610
target,
711
name,
812
target_kind,
913
message,
10-
tagger: signature,
14+
tagger,
1115
pgp_signature,
1216
} = other;
13-
Tag {
17+
let tagger = tagger
18+
.map(|raw| {
19+
gix_actor::SignatureRef::from_bytes::<crate::decode::ParseError>(raw.as_ref())
20+
.map_err(|err| crate::decode::Error::with_err(err, raw.as_ref()))
21+
})
22+
.transpose()?
23+
.map(Into::into);
24+
Ok(Tag {
1425
target: gix_hash::ObjectId::from_hex(target).expect("prior parser validation"),
1526
name: name.to_owned(),
1627
target_kind,
1728
message: message.to_owned(),
18-
tagger: signature.map(Into::into),
29+
tagger,
1930
pgp_signature: pgp_signature.map(ToOwned::to_owned),
20-
}
31+
})
2132
}
2233
}
2334

24-
impl From<CommitRef<'_>> for Commit {
25-
fn from(other: CommitRef<'_>) -> Commit {
35+
impl TryFrom<CommitRef<'_>> for Commit {
36+
type Error = crate::decode::Error;
37+
38+
fn try_from(other: CommitRef<'_>) -> Result<Commit, Self::Error> {
2639
let CommitRef {
2740
tree,
2841
parents,
29-
author,
30-
committer,
42+
author: author_raw,
43+
committer: committer_raw,
3144
encoding,
3245
message,
3346
extra_headers,
3447
} = other;
35-
Commit {
48+
let author = gix_actor::SignatureRef::from_bytes::<crate::decode::ParseError>(author_raw.as_ref())
49+
.map_err(|err| crate::decode::Error::with_err(err, author_raw.as_ref()))?;
50+
let committer = gix_actor::SignatureRef::from_bytes::<crate::decode::ParseError>(committer_raw.as_ref())
51+
.map_err(|err| crate::decode::Error::with_err(err, committer_raw.as_ref()))?;
52+
Ok(Commit {
3653
tree: gix_hash::ObjectId::from_hex(tree).expect("prior parser validation"),
3754
parents: parents
3855
.iter()
@@ -46,7 +63,7 @@ impl From<CommitRef<'_>> for Commit {
4663
.into_iter()
4764
.map(|(k, v)| (k.into(), v.into_owned()))
4865
.collect(),
49-
}
66+
})
5067
}
5168
}
5269

@@ -89,14 +106,16 @@ impl<'a> From<&'a tree::Entry> for tree::EntryRef<'a> {
89106
}
90107
}
91108

92-
impl From<ObjectRef<'_>> for Object {
93-
fn from(v: ObjectRef<'_>) -> Self {
94-
match v {
109+
impl TryFrom<ObjectRef<'_>> for Object {
110+
type Error = crate::decode::Error;
111+
112+
fn try_from(v: ObjectRef<'_>) -> Result<Self, Self::Error> {
113+
Ok(match v {
95114
ObjectRef::Tree(v) => Object::Tree(v.into()),
96115
ObjectRef::Blob(v) => Object::Blob(v.into()),
97-
ObjectRef::Commit(v) => Object::Commit(v.into()),
98-
ObjectRef::Tag(v) => Object::Tag(v.into()),
99-
}
116+
ObjectRef::Commit(v) => Object::Commit(Commit::try_from(v)?),
117+
ObjectRef::Tag(v) => Object::Tag(Tag::try_from(v)?),
118+
})
100119
}
101120
}
102121

gix-object/src/object/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,15 +216,15 @@ impl<'a> ObjectRef<'a> {
216216
/// Convert the immutable object into a mutable version, consuming the source in the process.
217217
///
218218
/// Note that this is an expensive operation.
219-
pub fn into_owned(self) -> Object {
220-
self.into()
219+
pub fn into_owned(self) -> Result<Object, crate::decode::Error> {
220+
Object::try_from(self)
221221
}
222222

223223
/// Convert this immutable object into its mutable counterpart.
224224
///
225225
/// Note that this is an expensive operation.
226-
pub fn to_owned(&self) -> Object {
227-
self.clone().into()
226+
pub fn to_owned(&self) -> Result<Object, crate::decode::Error> {
227+
Object::try_from(self.clone())
228228
}
229229
}
230230

gix-object/src/parse.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,13 @@ pub(crate) fn signature<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrC
7171
) -> ModalResult<gix_actor::SignatureRef<'a>, E> {
7272
gix_actor::signature::decode(i)
7373
}
74+
75+
pub(crate) fn signature_with_raw<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
76+
i: &mut &'a [u8],
77+
) -> ModalResult<(gix_actor::SignatureRef<'a>, &'a BStr), E> {
78+
let original = *i;
79+
gix_actor::signature::decode(i).map(|signature| {
80+
let consumed = original.len() - i.len();
81+
(signature, original[..consumed].as_bstr())
82+
})
83+
}

gix-object/src/tag/decode.rs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,23 @@ pub fn git_tag<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
1919
.context(StrContext::Expected("type <object kind>".into())),
2020
(|i: &mut _| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0])))
2121
.context(StrContext::Expected("tag <version>".into())),
22-
opt(|i: &mut _| parse::header_field(i, b"tagger", parse::signature))
23-
.context(StrContext::Expected("tagger <signature>".into())),
22+
opt(|i: &mut _| {
23+
parse::header_field(i, b"tagger", parse::signature_with_raw).map(|(signature, raw)| {
24+
let _ = signature;
25+
raw
26+
})
27+
})
28+
.context(StrContext::Expected("tagger <signature>".into())),
2429
terminated(message, eof),
2530
)
26-
.map(
27-
|(target, kind, tag_version, signature, (message, pgp_signature))| TagRef {
28-
target,
29-
name: tag_version.as_bstr(),
30-
target_kind: kind,
31-
message,
32-
tagger: signature,
33-
pgp_signature,
34-
},
35-
)
31+
.map(|(target, kind, tag_version, tagger, (message, pgp_signature))| TagRef {
32+
target,
33+
name: tag_version.as_bstr(),
34+
target_kind: kind,
35+
message,
36+
tagger,
37+
pgp_signature,
38+
})
3639
.parse_next(i)
3740
}
3841

0 commit comments

Comments
 (0)