Skip to content

Commit

Permalink
Add some safety docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Manishearth committed Apr 26, 2024
1 parent 2ca373d commit c3d0870
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 0 deletions.
1 change: 1 addition & 0 deletions crates/toml_edit/src/parser/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ pub(crate) fn unsigned_digits<'i, const MIN: usize, const MAX: usize>(
input: &mut Input<'i>,
) -> PResult<&'i str> {
take_while(MIN..=MAX, DIGIT)
// Safety: `digit` only produces ASCII
.map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") })
.parse_next(input)
}
Expand Down
2 changes: 2 additions & 0 deletions crates/toml_edit/src/parser/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ pub(crate) fn simple_key(input: &mut Input<'_>) -> PResult<(RawString, InternalS
fn unquoted_key<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
trace(
"unquoted-key",
// Safety: UNQUOTED_CHAR is only ASCII ranges
take_while(1.., UNQUOTED_CHAR)
.map(|b| unsafe { from_utf8_unchecked(b, "`is_unquoted_char` filters out on-ASCII") }),
)
Expand All @@ -101,6 +102,7 @@ pub(crate) fn is_unquoted_char(c: u8) -> bool {
UNQUOTED_CHAR.contains_token(c)
}

// Safety-usable invariant: UNQUOTED_CHAR is only ASCII ranges
const UNQUOTED_CHAR: (
RangeInclusive<u8>,
RangeInclusive<u8>,
Expand Down
20 changes: 20 additions & 0 deletions crates/toml_edit/src/parser/numbers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,15 @@ pub(crate) fn dec_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
)),
)
.recognize()
// Safety: DIGIT1_9, digit(), and `_` only covers ASCII ranges
.map(|b: &[u8]| unsafe {
from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII")
})
.context(StrContext::Label("integer")),
)
.parse_next(input)
}
/// Safety-usable invariant: DIGIT1_9 is only ASCII ranges
const DIGIT1_9: RangeInclusive<u8> = b'1'..=b'9';

// hex-prefix = %x30.78 ; 0x
Expand Down Expand Up @@ -114,11 +116,13 @@ pub(crate) fn hex_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
))
.recognize(),
)
// Safety: HEX_PREFIX, hexdig(), and `_` only covers ASCII ranges
.map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII") })
.context(StrContext::Label("hexadecimal integer")),
)
.parse_next(input)
}
/// Safety-usable invariant: HEX_PREFIX is ASCII only
const HEX_PREFIX: &[u8] = b"0x";

// oct-prefix = %x30.6F ; 0o
Expand Down Expand Up @@ -147,12 +151,15 @@ pub(crate) fn oct_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
))
.recognize(),
)
// Safety: DIGIT0_7, OCT_PREFIX, and `_` only covers ASCII ranges
.map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII") })
.context(StrContext::Label("octal integer")),
)
.parse_next(input)
}
/// Safety-usable invariant: OCT_PREFIX is ASCII only
const OCT_PREFIX: &[u8] = b"0o";
/// Safety-usable invariant: DIGIT0_7 is ASCII only
const DIGIT0_7: RangeInclusive<u8> = b'0'..=b'7';

// bin-prefix = %x30.62 ; 0b
Expand Down Expand Up @@ -181,12 +188,15 @@ pub(crate) fn bin_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
))
.recognize(),
)
// Safety: DIGIT0_1, BIN_PREFIX, and `_` only covers ASCII ranges
.map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII") })
.context(StrContext::Label("binary integer")),
)
.parse_next(input)
}
/// Safety-usable invariant: BIN_PREFIX is ASCII only
const BIN_PREFIX: &[u8] = b"0b";
/// Safety-usable invariant: DIGIT0_1 is ASCII only
const DIGIT0_1: RangeInclusive<u8> = b'0'..=b'1';

// ;; Float
Expand Down Expand Up @@ -234,6 +244,7 @@ pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
)
.recognize()
.map(|b: &[u8]| unsafe {
// Safety: `.` and `zero_prefixable_int` only handle ASCII
from_utf8_unchecked(
b,
"`.` and `parse_zero_prefixable_int` filter out non-ASCII",
Expand All @@ -243,6 +254,7 @@ pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
}

// zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT )
/// Safety-usable invariant: only produces ASCII
pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
(
digit,
Expand All @@ -261,8 +273,10 @@ pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str>
.map(|()| ()),
)
.recognize()
// Safety: `digit()` and `_` are all ASCII
.map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") })
.parse_next(input)
/// Safety-usable invariant upheld by only using `digit` and `_` in the parser

Check failure

Code scanning / clippy

expected one of ., ;, ?, }, or an operator, found doc comment /// Safety-usable invariant upheld by only using digitand_ in the parser Error

expected one of ., ;, ?, }, or an operator, found doc comment /// Safety-usable invariant upheld by only using digitand_ in the parser

Check failure

Code scanning / clippy

expected one of ., ;, ?, }, or an operator, found doc comment /// Safety-usable invariant upheld by only using digitand_ in the parser Error

expected one of ., ;, ?, }, or an operator, found doc comment /// Safety-usable invariant upheld by only using digitand_ in the parser
}

// exp = "e" float-exp-part
Expand All @@ -275,6 +289,7 @@ pub(crate) fn exp<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
)
.recognize()
.map(|b: &[u8]| unsafe {
// Safety: `e`, `E`, `+`, `-`, and `zero_prefixable_int` are all ASCII
from_utf8_unchecked(
b,
"`one_of` and `parse_zero_prefixable_int` filter out non-ASCII",
Expand Down Expand Up @@ -305,15 +320,20 @@ pub(crate) fn nan(input: &mut Input<'_>) -> PResult<f64> {
const NAN: &[u8] = b"nan";

// DIGIT = %x30-39 ; 0-9
/// Safety-usable invariant: only parses ASCII
pub(crate) fn digit(input: &mut Input<'_>) -> PResult<u8> {
// Safety: DIGIT is all ASCII
one_of(DIGIT).parse_next(input)
}
const DIGIT: RangeInclusive<u8> = b'0'..=b'9';

// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
/// Safety-usable invariant: only parses ASCII
pub(crate) fn hexdig(input: &mut Input<'_>) -> PResult<u8> {
// Safety: HEXDIG is all ASCII
one_of(HEXDIG).parse_next(input)
}
/// Safety-usable invariant: only ASCII ranges
pub(crate) const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) =
(DIGIT, b'A'..=b'F', b'a'..=b'f');

Expand Down
5 changes: 5 additions & 0 deletions crates/toml_edit/src/parser/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ fn escape_seq_char(input: &mut Input<'_>) -> PResult<char> {
pub(crate) fn hexescape<const N: usize>(input: &mut Input<'_>) -> PResult<char> {
take_while(0..=N, HEXDIG)
.verify(|b: &[u8]| b.len() == N)
// Safety: HEXDIG is ASCII-only
.map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") })
.verify_map(|s| u32::from_str_radix(s, 16).ok())
.try_map(|h| char::from_u32(h).ok_or(CustomError::OutOfRange))
Expand Down Expand Up @@ -217,13 +218,15 @@ fn mlb_quotes<'i>(
move |input: &mut Input<'i>| {
let start = input.checkpoint();
let res = terminated(b"\"\"", peek(term.by_ref()))
// Safety: ???
.map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
.parse_next(input);

match res {
Err(winnow::error::ErrMode::Backtrack(_)) => {
input.reset(&start);
terminated(b"\"", peek(term.by_ref()))
// Safety: ???
.map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
.parse_next(input)
}
Expand Down Expand Up @@ -346,13 +349,15 @@ fn mll_quotes<'i>(
move |input: &mut Input<'i>| {
let start = input.checkpoint();
let res = terminated(b"''", peek(term.by_ref()))
// Safety: ???
.map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
.parse_next(input);

match res {
Err(winnow::error::ErrMode::Backtrack(_)) => {
input.reset(&start);
terminated(b"'", peek(term.by_ref()))
// Safety: ???
.map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
.parse_next(input)
}
Expand Down
7 changes: 7 additions & 0 deletions crates/toml_edit/src/parser/trivia.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use winnow::token::take_while;

use crate::parser::prelude::*;

/// Safety invariant: must be called with valid UTF-8 in `bytes`
pub(crate) unsafe fn from_utf8_unchecked<'b>(
bytes: &'b [u8],
safety_justification: &'static str,
Expand All @@ -27,10 +28,12 @@ pub(crate) unsafe fn from_utf8_unchecked<'b>(

// wschar = ( %x20 / ; Space
// %x09 ) ; Horizontal tab
/// Safety-usable invariant: WSCHAR is only ASCII values
pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');

// ws = *wschar
pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
// Safety: WSCHAR only contains ASCII
take_while(0.., WSCHAR)
.map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
.parse_next(input)
Expand Down Expand Up @@ -58,8 +61,10 @@ pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {

// newline = ( %x0A / ; LF
// %x0D.0A ) ; CRLF
/// Safety-usable invariant: Only returns ASCII bytes
pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> {
alt((
// Safety: CR and LF are ASCII
one_of(LF).value(b'\n'),
(one_of(CR), one_of(LF)).value(b'\n'),
))
Expand All @@ -76,6 +81,7 @@ pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
)
.map(|()| ())
.recognize()
// Safety: `newline` and `WSCHAR` are all ASCII
.map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") })
.parse_next(input)
}
Expand All @@ -85,6 +91,7 @@ pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
(newline, ws_newline)
.recognize()
.map(|b| unsafe {
// Safety: `newline` and `WSCHAR` are all ASCII
from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
})
.parse_next(input)
Expand Down

0 comments on commit c3d0870

Please sign in to comment.