Skip to content

Commit

Permalink
fix(SourceCode): Fix and improve default SourceCode implementation
Browse files Browse the repository at this point in the history
- Fix a panic when a 0 length span covers the end of a document
- Fix incorrect `line_count`
- Add new unit tests and complete existing ones
- Improve readability
  • Loading branch information
Nahor committed Mar 20, 2024
1 parent a37328a commit f7fa50d
Showing 1 changed file with 185 additions and 72 deletions.
257 changes: 185 additions & 72 deletions src/source_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,91 +5,103 @@ use std::{borrow::Cow, collections::VecDeque, fmt::Debug, sync::Arc};

use crate::{MietteError, MietteSpanContents, SourceCode, SourceSpan, SpanContents};

#[derive(Clone, Copy)]
struct LineInfo {
line_no: usize,
start: usize,
end: usize,
}

fn context_info<'a>(
input: &'a [u8],
span: &SourceSpan,
context_lines_before: usize,
context_lines_after: usize,
) -> Result<MietteSpanContents<'a>, MietteError> {
let mut offset = 0usize;
let mut line_count = 0usize;
let mut start_line = 0usize;
let mut start_column = 0usize;
let mut before_lines_starts = VecDeque::new();
let mut current_line_start = 0usize;
let mut end_lines = 0usize;
let mut post_span = false;
let mut post_span_got_newline = false;
let mut iter = input.iter().copied().peekable();
while let Some(char) = iter.next() {
if matches!(char, b'\r' | b'\n') {
line_count += 1;
if char == b'\r' && iter.next_if_eq(&b'\n').is_some() {
offset += 1;
let mut iter = input
.split_inclusive(|b| *b == b'\n')
.enumerate()
.map(|(line_no, line)| {
// SAFETY:
// - it is safe to use `offset_from` on slices of an array per Rust design (max array size)
// (https://doc.rust-lang.org/stable/reference/types/numeric.html#machine-dependent-integer-types)
// - since `line` is a slice of `input, the offset cannot be negative either
let offset = unsafe { line.as_ptr().offset_from(input.as_ptr()) } as usize;
LineInfo {
line_no,
start: offset,
end: offset + line.len(),
}
if offset < span.offset() {
// We're before the start of the span.
start_column = 0;
before_lines_starts.push_back(current_line_start);
if before_lines_starts.len() > context_lines_before {
start_line += 1;
before_lines_starts.pop_front();
}
} else if offset >= span.offset() + span.len().saturating_sub(1) {
// We're after the end of the span, but haven't necessarily
// started collecting end lines yet (we might still be
// collecting context lines).
if post_span {
start_column = 0;
if post_span_got_newline {
end_lines += 1;
} else {
post_span_got_newline = true;
}
if end_lines >= context_lines_after {
offset += 1;
break;
}
}
}
current_line_start = offset + 1;
} else if offset < span.offset() {
start_column += 1;
}
});

if offset >= (span.offset() + span.len()).saturating_sub(1) {
post_span = true;
if end_lines >= context_lines_after {
offset += 1;
break;
}
// First line handled separately because otherwise, we can't distinguish
// between `None` from an empty document (which we still want to handle as
// a "single empty line"), and `None` from the end of the document.
let mut line_starts = VecDeque::new();
let mut line_info = match iter.next() {
None => LineInfo {
line_no: 0,
start: 0,
end: 0,
},
Some(info) => info,
};
line_starts.push_back(line_info);

// Get the "before" lines (including the line containing the start
// of the span)
while span.offset() >= line_info.end {
line_info = match iter.next() {
None => break,
Some(info) => info,
};

if line_starts.len() > context_lines_before {
line_starts.pop_front();
}
line_starts.push_back(line_info);
}
let (start_lineno, start_offset, start_column) = {
let start_info = line_starts.pop_front().unwrap();
if context_lines_before > 0 {
(start_info.line_no, start_info.start, 0)
} else {
(
start_info.line_no,
span.offset(),
span.offset() - start_info.start,
)
}
};

offset += 1;
// Find the end of the span
while span.offset() + span.len() > line_info.end {
line_info = match iter.next() {
None => break,
Some(info) => info,
};
}

if offset >= (span.offset() + span.len()).saturating_sub(1) {
let starting_offset = before_lines_starts.front().copied().unwrap_or_else(|| {
if context_lines_before == 0 {
span.offset()
} else {
0
}
});
Ok(MietteSpanContents::new(
&input[starting_offset..offset],
(starting_offset, offset - starting_offset).into(),
start_line,
if context_lines_before == 0 {
start_column
} else {
0
},
line_count,
))
} else {
Err(MietteError::OutOfBounds)
// Get the "after" lines
if let Some(last) = iter.take(context_lines_after).last() {
line_info = last;
}
if span.offset() + span.len() > line_info.end {
return Err(MietteError::OutOfBounds);
}
let (end_lineno, end_offset) = if context_lines_after > 0 {
(line_info.line_no, line_info.end)
} else {
(line_info.line_no, span.offset() + span.len())
};

Ok(MietteSpanContents::new(
&input[start_offset..end_offset],
(start_offset..end_offset).into(),
start_lineno,
start_column,
end_lineno - start_lineno + 1,
))
}

impl SourceCode for [u8] {
Expand Down Expand Up @@ -206,8 +218,10 @@ mod tests {
let src = String::from("foo\n");
let contents = src.read_span(&(0, 4).into(), 0, 0)?;
assert_eq!("foo\n", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((0, 4)), *contents.span());
assert_eq!(0, contents.line());
assert_eq!(0, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

Expand All @@ -216,8 +230,10 @@ mod tests {
let src = String::from("foobar");
let contents = src.read_span(&(3, 3).into(), 1, 1)?;
assert_eq!("foobar", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((0, 6)), *contents.span());
assert_eq!(0, contents.line());
assert_eq!(0, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

Expand All @@ -226,8 +242,10 @@ mod tests {
let src = String::from("foo\nbar\nbaz\n");
let contents = src.read_span(&(4, 4).into(), 0, 0)?;
assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((4, 4)), *contents.span());
assert_eq!(1, contents.line());
assert_eq!(0, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

Expand All @@ -236,8 +254,58 @@ mod tests {
let src = String::from("foo\nbarbar\nbaz\n");
let contents = src.read_span(&(7, 4).into(), 0, 0)?;
assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((7, 4)), *contents.span());
assert_eq!(1, contents.line());
assert_eq!(3, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

#[test]
fn end_of_line_before_newline() -> Result<(), MietteError> {
let src = String::from("foo\nbar\nbaz\n");
let contents = src.read_span(&(7, 0).into(), 0, 0)?;
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((7, 0)), *contents.span());
assert_eq!(1, contents.line());
assert_eq!(3, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

#[test]
fn end_of_line_after_newline() -> Result<(), MietteError> {
let src = String::from("foo\nbar\nbaz\n");
let contents = src.read_span(&(8, 0).into(), 0, 0)?;
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((8, 0)), *contents.span());
assert_eq!(2, contents.line());
assert_eq!(0, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

#[test]
fn end_of_file_with_newline() -> Result<(), MietteError> {
let src = String::from("foo\nbar\nbaz\n");
let contents = src.read_span(&(12, 0).into(), 0, 0)?;
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((12, 0)), *contents.span());
assert_eq!(2, contents.line());
assert_eq!(4, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

#[test]
fn end_of_file_without_newline() -> Result<(), MietteError> {
let src = String::from("foo\nbar\nbaz");
let contents = src.read_span(&(11, 0).into(), 0, 0)?;
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((11, 0)), *contents.span());
assert_eq!(2, contents.line());
assert_eq!(3, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

Expand All @@ -246,8 +314,10 @@ mod tests {
let src = String::from("foo\r\nbar\r\nbaz\r\n");
let contents = src.read_span(&(5, 5).into(), 0, 0)?;
assert_eq!("bar\r\n", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((5, 5)), *contents.span());
assert_eq!(1, contents.line());
assert_eq!(0, contents.column());
assert_eq!(1, contents.line_count());
Ok(())
}

Expand All @@ -259,8 +329,10 @@ mod tests {
"foo\nbar\nbaz\n",
std::str::from_utf8(contents.data()).unwrap()
);
assert_eq!(SourceSpan::from((4, 12)), *contents.span());
assert_eq!(1, contents.line());
assert_eq!(0, contents.column());
assert_eq!(3, contents.line_count());
Ok(())
}

Expand All @@ -272,8 +344,10 @@ mod tests {
"\nfoo\nbar\nbaz\n\n",
std::str::from_utf8(contents.data()).unwrap()
);
assert_eq!(SourceSpan::from((8, 14)), *contents.span());
assert_eq!(2, contents.line());
assert_eq!(0, contents.column());
assert_eq!(5, contents.line_count());
let span: SourceSpan = (8, 14).into();
assert_eq!(&span, contents.span());
Ok(())
Expand All @@ -287,10 +361,49 @@ mod tests {
"one\ntwo\n\n",
std::str::from_utf8(contents.data()).unwrap()
);
assert_eq!(SourceSpan::from((0, 9)), *contents.span());
assert_eq!(0, contents.line());
assert_eq!(0, contents.column());
let span: SourceSpan = (0, 9).into();
assert_eq!(&span, contents.span());
assert_eq!(3, contents.line_count());
Ok(())
}

#[test]
fn empty_source() -> Result<(), MietteError> {
let src = String::from("");

let contents = src.read_span(&(0, 0).into(), 0, 0)?;
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
assert_eq!(SourceSpan::from((0, 0)), *contents.span());
assert_eq!(0, contents.line());
assert_eq!(0, contents.column());
assert_eq!(1, contents.line_count());

Ok(())
}

#[test]
fn empty_source_out_of_bounds() {
let src = String::from("");

let contents = src.read_span(&(0, 1).into(), 0, 0);
assert!(matches!(contents, Err(MietteError::OutOfBounds)));

let contents = src.read_span(&(0, 2).into(), 0, 0);
assert!(matches!(contents, Err(MietteError::OutOfBounds)));

let contents = src.read_span(&(1, 0).into(), 0, 0);
assert!(matches!(contents, Err(MietteError::OutOfBounds)));

let contents = src.read_span(&(1, 1).into(), 0, 0);
assert!(matches!(contents, Err(MietteError::OutOfBounds)));

let contents = src.read_span(&(2, 0).into(), 0, 0);
assert!(matches!(contents, Err(MietteError::OutOfBounds)));

let contents = src.read_span(&(2, 1).into(), 0, 0);
assert!(matches!(contents, Err(MietteError::OutOfBounds)));
}
}

0 comments on commit f7fa50d

Please sign in to comment.