Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add binding for git_message_trailers #749

Merged
merged 20 commits into from Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 21 additions & 0 deletions libgit2-sys/lib.rs
Expand Up @@ -1953,6 +1953,20 @@ git_enum! {
}
}

#[repr(C)]
pub struct git_message_trailer {
pub key: *const c_char,
pub value: *const c_char,
}

#[repr(C)]
#[derive(Copy, Clone)]
pub struct git_message_trailer_array {
pub trailers: *mut git_message_trailer,
pub count: size_t,
pub _trailer_block: *mut c_char,
}

extern "C" {
// threads
pub fn git_libgit2_init() -> c_int;
Expand Down Expand Up @@ -3678,6 +3692,13 @@ extern "C" {
comment_char: c_char,
) -> c_int;

pub fn git_message_trailers(
out: *mut git_message_trailer_array,
message: *const c_char,
) -> c_int;

pub fn git_message_trailer_array_free(trailer: *mut git_message_trailer_array);

// packbuilder
pub fn git_packbuilder_new(out: *mut *mut git_packbuilder, repo: *mut git_repository) -> c_int;
pub fn git_packbuilder_set_threads(pb: *mut git_packbuilder, n: c_uint) -> c_uint;
Expand Down
6 changes: 5 additions & 1 deletion src/lib.rs
Expand Up @@ -101,7 +101,11 @@ pub use crate::indexer::{IndexerProgress, Progress};
pub use crate::mailmap::Mailmap;
pub use crate::mempack::Mempack;
pub use crate::merge::{AnnotatedCommit, MergeOptions};
pub use crate::message::{message_prettify, DEFAULT_COMMENT_CHAR};
pub use crate::message::{
message_prettify, message_trailers_bytes, message_trailers_strs, MessageTrailersBytes,
MessageTrailersBytesIterator, MessageTrailersStrs, MessageTrailersStrsIterator,
DEFAULT_COMMENT_CHAR,
};
pub use crate::note::{Note, Notes};
pub use crate::object::Object;
pub use crate::odb::{Odb, OdbObject, OdbPackwriter, OdbReader, OdbWriter};
Expand Down
285 changes: 284 additions & 1 deletion src/message.rs
@@ -1,4 +1,7 @@
use core::ops::Range;
use std::ffi::CStr;
use std::ffi::CString;
use std::ptr;

use libc::{c_char, c_int};

Expand Down Expand Up @@ -31,12 +34,216 @@ fn _message_prettify(message: CString, comment_char: Option<u8>) -> Result<Strin
/// The default comment character for `message_prettify` ('#')
pub const DEFAULT_COMMENT_CHAR: Option<u8> = Some(b'#');

/// Get the trailers for the given message.
///
/// Use this function when you are dealing with a UTF-8-encoded message.
pub fn message_trailers_strs(message: &str) -> Result<MessageTrailersStrs, Error> {
_message_trailers(message.into_c_string()?).map(|res| MessageTrailersStrs(res))
}

/// Get the trailers for the given message.
///
/// Use this function when the message might not be UTF-8-encoded,
/// or if you want to handle the returned trailer key–value pairs
/// as bytes.
pub fn message_trailers_bytes<S: IntoCString>(message: S) -> Result<MessageTrailersBytes, Error> {
_message_trailers(message.into_c_string()?).map(|res| MessageTrailersBytes(res))
}

fn _message_trailers(message: CString) -> Result<MessageTrailers, Error> {
let ret = MessageTrailers::new();
unsafe {
try_call!(raw::git_message_trailers(ret.raw(), message));
}
Ok(ret)
}

/// Collection of UTF-8-encoded trailers.
///
/// Use `iter()` to get access to the values.
pub struct MessageTrailersStrs(MessageTrailers);

impl MessageTrailersStrs {
/// Create a borrowed iterator.
pub fn iter(&self) -> MessageTrailersStrsIterator<'_> {
MessageTrailersStrsIterator(self.0.iter())
}
/// The number of trailer key–value pairs.
pub fn len(&self) -> usize {
self.0.len()
}
/// Convert to the “bytes” variant.
pub fn to_bytes(self) -> MessageTrailersBytes {
MessageTrailersBytes(self.0)
}
}

/// Collection of unencoded (bytes) trailers.
///
/// Use `iter()` to get access to the values.
pub struct MessageTrailersBytes(MessageTrailers);

impl MessageTrailersBytes {
/// Create a borrowed iterator.
pub fn iter(&self) -> MessageTrailersBytesIterator<'_> {
MessageTrailersBytesIterator(self.0.iter())
}
/// The number of trailer key–value pairs.
pub fn len(&self) -> usize {
self.0.len()
}
}

struct MessageTrailers {
raw: raw::git_message_trailer_array,
}

impl MessageTrailers {
fn new() -> MessageTrailers {
crate::init();
unsafe {
Binding::from_raw(&mut raw::git_message_trailer_array {
trailers: ptr::null_mut(),
count: 0,
_trailer_block: ptr::null_mut(),
} as *mut _)
}
}
fn iter(&self) -> MessageTrailersIterator<'_> {
MessageTrailersIterator {
trailers: self,
range: Range {
start: 0,
end: self.raw.count,
},
}
}
fn len(&self) -> usize {
self.raw.count
}
}

impl Drop for MessageTrailers {
fn drop(&mut self) {
unsafe {
raw::git_message_trailer_array_free(&mut self.raw);
}
}
}

impl Binding for MessageTrailers {
type Raw = *mut raw::git_message_trailer_array;
unsafe fn from_raw(raw: *mut raw::git_message_trailer_array) -> MessageTrailers {
MessageTrailers { raw: *raw }
}
fn raw(&self) -> *mut raw::git_message_trailer_array {
&self.raw as *const _ as *mut _
}
}

struct MessageTrailersIterator<'a> {
trailers: &'a MessageTrailers,
range: Range<usize>,
}

fn to_raw_tuple(trailers: &MessageTrailers, index: usize) -> (*const c_char, *const c_char) {
unsafe {
let addr = trailers.raw.trailers.wrapping_add(index);
((*addr).key, (*addr).value)
}
}

/// Borrowed iterator over the UTF-8-encoded trailers.
pub struct MessageTrailersStrsIterator<'a>(MessageTrailersIterator<'a>);

impl<'pair> Iterator for MessageTrailersStrsIterator<'pair> {
type Item = (&'pair str, &'pair str);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this perhaps be an iterator of &[u8] instead of &str to allow for non-utf8 messages?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’ve gone back and forth on the UTF-8 issue. I think I ended up trying to do what message_prettify(…) does, and that function seems to assume that it gets a UTF-8 string back. It seems though that it can send in a non-UTF-8 string since it passes in a IntoCString (as does message_trailers(…)), in which case it would error out when it gets non-UTF-8 strings back (if I’ve read the code correctly).

Supporting &[u8] seems good, but what about both types? It seems inconvenient to send in a UTF-8 string to C, get back &[u8] values that you know are UTF-8 and then having to convert or assert that they are also valid as &str.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eh I could also go either way on this. I don't think the bindings are super principled so far, some of the time it tries to give you bytes and other times it gives you strings. I don't really mind either way on this, we can always add a separate iterator in the future specifically for bytes if strings becomes a problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See rev 70b7ebd


fn next(&mut self) -> Option<Self::Item> {
self.0
.range
.next()
.map(|index| to_str_tuple(&self.0.trailers, index))
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.0.range.size_hint()
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add size_hint here too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay I think I understand now. I confused myself thought that size_hint() was part of ExactSizeIterator.

See rev 16321f3

}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add implementations for size_hint and ExactSizeIterator here as well? (which would also necessitate DoubleEndedIterator, and the implementations should be relatively easy if the counter field is changed to a Range)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I implemented len() for ExactSizeIterator. The doc says that size_hint() can be derived from it. I couldn’t implement size_hint() myself.

The doc says that len() has a default implementation, but I guess just returning the value directly wouldn’t be less efficient than whatever the default impl. is.

See rev. e7b0306


impl ExactSizeIterator for MessageTrailersStrsIterator<'_> {
fn len(&self) -> usize {
self.0.range.len()
}
}

impl DoubleEndedIterator for MessageTrailersStrsIterator<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
self.0
.range
.next_back()
.map(|index| to_str_tuple(&self.0.trailers, index))
}
}

fn to_str_tuple(trailers: &MessageTrailers, index: usize) -> (&str, &str) {
unsafe {
let (rkey, rvalue) = to_raw_tuple(&trailers, index);
let key = CStr::from_ptr(rkey).to_str().unwrap();
let value = CStr::from_ptr(rvalue).to_str().unwrap();
(key, value)
}
}

/// Borrowed iterator over the raw (bytes) trailers.
pub struct MessageTrailersBytesIterator<'a>(MessageTrailersIterator<'a>);

impl<'pair> Iterator for MessageTrailersBytesIterator<'pair> {
type Item = (&'pair [u8], &'pair [u8]);

fn next(&mut self) -> Option<Self::Item> {
self.0
.range
.next()
.map(|index| to_bytes_tuple(&self.0.trailers, index))
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.0.range.size_hint()
}
}

impl ExactSizeIterator for MessageTrailersBytesIterator<'_> {
fn len(&self) -> usize {
self.0.range.len()
}
}

impl DoubleEndedIterator for MessageTrailersBytesIterator<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
self.0
.range
.next_back()
.map(|index| to_bytes_tuple(&self.0.trailers, index))
}
}

fn to_bytes_tuple(trailers: &MessageTrailers, index: usize) -> (&[u8], &[u8]) {
unsafe {
let (rkey, rvalue) = to_raw_tuple(&trailers, index);
let key = CStr::from_ptr(rkey).to_bytes();
let value = CStr::from_ptr(rvalue).to_bytes();
(key, value)
}
}

#[cfg(test)]
mod tests {
use crate::{message_prettify, DEFAULT_COMMENT_CHAR};

#[test]
fn prettify() {
use crate::{message_prettify, DEFAULT_COMMENT_CHAR};

// This does not attempt to duplicate the extensive tests for
// git_message_prettify in libgit2, just a few representative values to
// make sure the interface works as expected.
Expand All @@ -58,4 +265,80 @@ mod tests {
"1\n"
);
}

#[test]
fn trailers() {
use crate::{message_trailers_bytes, message_trailers_strs, MessageTrailersStrs};
use std::collections::HashMap;

// no trailers
let message1 = "
WHAT ARE WE HERE FOR

What are we here for?

Just to be eaten?
";
let expected: HashMap<&str, &str> = HashMap::new();
assert_eq!(expected, to_map(&message_trailers_strs(message1).unwrap()));

// standard PSA
let message2 = "
Attention all

We are out of tomatoes.

Spoken-by: Major Turnips
Transcribed-by: Seargant Persimmons
Signed-off-by: Colonel Kale
";
let expected: HashMap<&str, &str> = vec![
("Spoken-by", "Major Turnips"),
("Transcribed-by", "Seargant Persimmons"),
("Signed-off-by", "Colonel Kale"),
]
.into_iter()
.collect();
assert_eq!(expected, to_map(&message_trailers_strs(message2).unwrap()));

// ignore everything after `---`
let message3 = "
The fate of Seargant Green-Peppers

Seargant Green-Peppers was killed by Caterpillar Battalion 44.

Signed-off-by: Colonel Kale
---
I never liked that guy, anyway.

Opined-by: Corporal Garlic
";
let expected: HashMap<&str, &str> = vec![("Signed-off-by", "Colonel Kale")]
.into_iter()
.collect();
assert_eq!(expected, to_map(&message_trailers_strs(message3).unwrap()));

// Raw bytes message; not valid UTF-8
// Source: https://stackoverflow.com/a/3886015/1725151
let message4 = b"
Be honest guys

Am I a malformed brussels sprout?

Signed-off-by: Lieutenant \xe2\x28\xa1prout
";

let trailer = message_trailers_bytes(&message4[..]).unwrap();
let expected = (&b"Signed-off-by"[..], &b"Lieutenant \xe2\x28\xa1prout"[..]);
let actual = trailer.iter().next().unwrap();
assert_eq!(expected, actual);

fn to_map(trailers: &MessageTrailersStrs) -> HashMap<&str, &str> {
let mut map = HashMap::with_capacity(trailers.len());
for (key, value) in trailers.iter() {
map.insert(key, value);
}
map
}
}
}