Skip to content

Commit

Permalink
Update the spec test suite submodule (#763)
Browse files Browse the repository at this point in the history
* Update the spec test suite submodule

This commit updates the `tests/testsuite` submodule which is a copy of
the spec tests from upstream and the various proposals repositories. The
major change included here is the update to the lexing of the text
format to allow "reserved" tokens to have string tokens embedded within
them (in a sense). This doesn't affect any parsing in the crate, only
the tokenization of the input.

Additionally there are a few minor updates to error messages plus an
new exceptional case where the spec interpreter is printing "unknown
global" for defined globals which I'm not really sure what's going on.

* Fix tests

* Fix more tests

* Add support for the relaxed-simd spec test suite
  • Loading branch information
alexcrichton committed Sep 21, 2022
1 parent 3dfb643 commit f9a5709
Show file tree
Hide file tree
Showing 13 changed files with 149 additions and 64 deletions.
10 changes: 10 additions & 0 deletions crates/wast/src/core/wast.rs
Expand Up @@ -76,6 +76,8 @@ pub enum WastRetCore<'a> {
RefExtern(u32),
/// A non-null funcref is expected.
RefFunc(Option<Index<'a>>),

Either(Vec<WastRetCore<'a>>),
}

static RETS: &[(&str, fn(Parser<'_>) -> Result<WastRetCore<'_>>)] = {
Expand All @@ -89,6 +91,14 @@ static RETS: &[(&str, fn(Parser<'_>) -> Result<WastRetCore<'_>>)] = {
("ref.null", |p| Ok(RefNull(p.parse()?))),
("ref.extern", |p| Ok(RefExtern(p.parse()?))),
("ref.func", |p| Ok(RefFunc(p.parse()?))),
("either", |p| {
p.depth_check()?;
let mut cases = Vec::new();
while !p.is_empty() {
cases.push(p.parens(|p| p.parse())?);
}
Ok(Either(cases))
}),
]
};

Expand Down
161 changes: 112 additions & 49 deletions crates/wast/src/lexer.rs
Expand Up @@ -90,6 +90,12 @@ pub enum Token<'a> {
Float(Float<'a>),
}

enum ReservedKind<'a> {
String(Cow<'a, [u8]>),
Idchars,
Reserved,
}

/// Errors that can be generated while lexing.
///
/// All lexing errors have line/colum/position information as well as a
Expand Down Expand Up @@ -354,37 +360,51 @@ impl<'a> Lexer<'a> {

b')' => Ok(Some(Token::RParen(self.split_first_byte()))),

b'"' => {
let val = self.string()?;
let src = &self.input[pos..self.cur()];
return Ok(Some(Token::String(WasmString(Box::new(WasmStringInner {
val,
src,
})))));
}

// https://webassembly.github.io/spec/core/text/lexical.html#white-space
b' ' | b'\n' | b'\r' | b'\t' => Ok(Some(Token::Whitespace(self.split_ws()))),

c @ idchars!() => {
let reserved = self.split_while(|b| matches!(b, idchars!()));

// https://webassembly.github.io/spec/core/text/values.html#integers
if let Some(number) = self.number(reserved) {
Ok(Some(number))
// https://webassembly.github.io/spec/core/text/values.html#text-id
} else if *c == b'$' && reserved.len() > 1 {
Ok(Some(Token::Id(reserved)))
// https://webassembly.github.io/spec/core/text/lexical.html#text-keyword
} else if b'a' <= *c && *c <= b'z' {
Ok(Some(Token::Keyword(reserved)))
} else {
Ok(Some(Token::Reserved(reserved)))
c @ (idchars!() | b'"') => {
let (kind, src) = self.split_reserved()?;
match kind {
// If the reserved token was simply a single string then
// that is converted to a standalone string token
ReservedKind::String(val) => {
return Ok(Some(Token::String(WasmString(Box::new(WasmStringInner {
val,
src,
})))));
}

// If only idchars were consumed then this could be a
// specific kind of standalone token we're interested in.
ReservedKind::Idchars => {
// https://webassembly.github.io/spec/core/text/values.html#integers
if let Some(number) = self.number(src) {
return Ok(Some(number));
// https://webassembly.github.io/spec/core/text/values.html#text-id
} else if *c == b'$' && src.len() > 1 {
return Ok(Some(Token::Id(src)));
// https://webassembly.github.io/spec/core/text/lexical.html#text-keyword
} else if b'a' <= *c && *c <= b'z' {
return Ok(Some(Token::Keyword(src)));
}
}

// ... otherwise this was a conglomeration of idchars,
// strings, or just idchars that don't match a prior rule,
// meaning this falls through to the fallback `Reserved`
// token.
ReservedKind::Reserved => {}
}

Ok(Some(Token::Reserved(src)))
}

// This could be a line comment, otherwise `;` is a reserved token.
// The second byte is checked to see if it's a `;;` line comment
//
// Note that this character being considered as part of a
// `reserved` token is part of the annotations proposal.
b';' => match self.remaining.as_bytes().get(1) {
Some(b';') => {
let comment = self.split_until(b'\n');
Expand All @@ -395,6 +415,9 @@ impl<'a> Lexer<'a> {
},

// Other known reserved tokens other than `;`
//
// Note that these characters being considered as part of a
// `reserved` token is part of the annotations proposal.
b',' | b'[' | b']' | b'{' | b'}' => Ok(Some(Token::Reserved(self.split_first_byte()))),

_ => {
Expand Down Expand Up @@ -466,16 +489,74 @@ impl<'a> Lexer<'a> {
ret
}

fn split_while(&mut self, f: impl Fn(u8) -> bool) -> &'a str {
let pos = self
.remaining
.as_bytes()
.iter()
.position(|b| !f(*b))
.unwrap_or(self.remaining.len());
/// Splits off a "reserved" token which is then further processed later on
/// to figure out which kind of token it is `depending on `ReservedKind`.
///
/// For more information on this method see the clarification at
/// https://github.com/WebAssembly/spec/pull/1499 but the general gist is
/// that this is parsing the grammar:
///
/// ```text
/// reserved := (idchar | string)+
/// ```
///
/// which means that it is eating any number of adjacent string/idchar
/// tokens (e.g. `a"b"c`) and returning the classification of what was
/// eaten. The classification assists in determining what the actual token
/// here eaten looks like.
fn split_reserved(&mut self) -> Result<(ReservedKind<'a>, &'a str), Error> {
let mut idchars = false;
let mut strings = 0u32;
let mut last_string_val = None;
let mut pos = 0;
while let Some(byte) = self.remaining.as_bytes().get(pos) {
match byte {
// Normal `idchars` production which appends to the reserved
// token that's being produced.
idchars!() => {
idchars = true;
pos += 1;
}

// https://webassembly.github.io/spec/core/text/values.html#text-string
b'"' => {
strings += 1;
pos += 1;
let mut it = self.remaining[pos..].chars();
let result = Lexer::parse_str(&mut it, self.allow_confusing_unicode);
pos = self.remaining.len() - it.as_str().len();
match result {
Ok(s) => last_string_val = Some(s),
Err(e) => {
let start = self.input.len() - self.remaining.len();
self.remaining = &self.remaining[pos..];
let err_pos = match &e {
LexError::UnexpectedEof => self.input.len(),
_ => {
self.input[..start + pos]
.char_indices()
.next_back()
.unwrap()
.0
}
};
return Err(self.error(err_pos, e));
}
}
}

// Nothing else is considered part of a reserved token
_ => break,
}
}
let (ret, remaining) = self.remaining.split_at(pos);
self.remaining = remaining;
ret
Ok(match (idchars, strings) {
(false, 0) => unreachable!(),
(false, 1) => (ReservedKind::String(last_string_val.unwrap()), ret),
(true, 0) => (ReservedKind::Idchars, ret),
_ => (ReservedKind::Reserved, ret),
})
}

fn number(&self, src: &'a str) -> Option<Token<'a>> {
Expand Down Expand Up @@ -688,24 +769,6 @@ impl<'a> Lexer<'a> {
Ok(())
}

/// Reads everything for a literal string except the leading `"`. Returns
/// the string value that has been read.
///
/// https://webassembly.github.io/spec/core/text/values.html#text-string
fn string(&mut self) -> Result<Cow<'a, [u8]>, Error> {
let mut it = self.remaining[1..].chars();
let result = Lexer::parse_str(&mut it, self.allow_confusing_unicode);
let end = self.input.len() - it.as_str().len();
self.remaining = &self.input[end..];
result.map_err(|e| {
let err_pos = match &e {
LexError::UnexpectedEof => self.input.len(),
_ => self.input[..end].char_indices().next_back().unwrap().0,
};
self.error(err_pos, e)
})
}

fn parse_str(
it: &mut str::Chars<'a>,
allow_confusing_unicode: bool,
Expand Down
2 changes: 1 addition & 1 deletion crates/wast/src/names.rs
Expand Up @@ -81,6 +81,6 @@ pub fn resolve_error(id: Id<'_>, ns: &str) -> Error {
);
Error::new(
id.span(),
format!("failed to find {} named `${}`", ns, id.name()),
format!("unknown {ns}: failed to find name `${}`", id.name()),
)
}
2 changes: 1 addition & 1 deletion crates/wast/tests/parse-fail/bad-index.wat.err
@@ -1,4 +1,4 @@
failed to find label named `$s`
unknown label: failed to find name `$s`
--> tests/parse-fail/bad-index.wat:1:18
|
1 | (func br_on_null $s)
Expand Down
4 changes: 2 additions & 2 deletions tests/local/component-model/adapt.wast
Expand Up @@ -252,7 +252,7 @@
(core instance $i (instantiate $m))
(core func (canon lower (func $i "")))
)
"failed to find instance named `$i`")
"unknown instance: failed to find name `$i`")

(assert_invalid
(component
Expand Down Expand Up @@ -284,4 +284,4 @@
(import "" (func $f))
(func (export "foo") (canon lift (core func $f)))
)
"failed to find core func named `$f`")
"unknown core func: failed to find name `$f`")
5 changes: 3 additions & 2 deletions tests/local/component-model/definedtypes.wast
Expand Up @@ -57,17 +57,18 @@
(component
(type $t (variant (case "x" (refines $y)) (case $y "y" string)))
)
"failed to find variant case named `$y`"
"unknown variant case"
)

(assert_invalid
(component
(type $t string)
(type $v (variant (case "x" $t (refines $z))))
)
"failed to find variant case named `$z`"
"unknown variant case"
)


(assert_invalid
(component
(type $t string)
Expand Down
2 changes: 1 addition & 1 deletion tests/local/component-model/instance-type.wast
Expand Up @@ -204,7 +204,7 @@
(type (instance
(export "" (core module (type $t)))
)))
"failed to find core type named `$t`")
"unknown core type")

(assert_invalid
(component
Expand Down
2 changes: 1 addition & 1 deletion tests/local/component-model/invalid.wast
Expand Up @@ -11,7 +11,7 @@
(component quote
"(export \"\" (func $foo))"
)
"failed to find func named")
"unknown func")

(assert_malformed
(component quote
Expand Down
8 changes: 4 additions & 4 deletions tests/local/component-model/types.wast
Expand Up @@ -124,7 +124,7 @@
(alias outer $c $t (type))
))
)
"failed to find core type named `$t`")
"unknown core type")

(assert_invalid
(component $c
Expand All @@ -149,7 +149,7 @@
(alias outer $c $t (type))
))
)
"failed to find type named `$t`")
"unknown type")

(assert_invalid
(component $c
Expand Down Expand Up @@ -203,7 +203,7 @@
(alias outer $c $t (type))
))
)
"failed to find type named `$t`")
"unknown type")

(assert_invalid
(component $c
Expand Down Expand Up @@ -288,4 +288,4 @@
(import "" (type (eq 0)))
(export "" (type (eq 0)))
))
)
)
2 changes: 1 addition & 1 deletion tests/local/multi-memory.wast
Expand Up @@ -216,7 +216,7 @@
(module quote
"(func i32.load $a)"
)
"failed to find memory")
"unknown memory")

(module
(memory 1)
Expand Down
2 changes: 1 addition & 1 deletion tests/local/try.wast
Expand Up @@ -52,4 +52,4 @@
(module quote
"(func (try $l (do) (delegate $l)))"
)
"failed to find label")
"unknown label")
11 changes: 11 additions & 0 deletions tests/roundtrip.rs
Expand Up @@ -132,6 +132,12 @@ fn skip_test(test: &Path, contents: &[u8]) -> bool {
"function-references/func_bind.wast",
"function-references/ref_as_non_null.wast",
"function-references/return_call_ref.wast",
// TODO: new syntax for table types has been added with an optional
// initializer which needs parsing in the text format.
"function-references/table.wast",
// TODO: This references an instruction which has since been removed
// from the proposal so the test needs an update.
"relaxed-simd/relaxed_fma_fms.wast",
];
if broken.iter().any(|x| test.ends_with(x)) {
return true;
Expand Down Expand Up @@ -460,6 +466,7 @@ impl TestState {
"component-model" => features.component_model = true,
"multi-memory" => features.multi_memory = true,
"extended-const" => features.extended_const = true,
"relaxed-simd" => features.relaxed_simd = true,
_ => {}
}
}
Expand Down Expand Up @@ -602,5 +609,9 @@ fn error_matches(error: &str, message: &str) -> bool {
return error.contains("invalid u32 number: constant out of range");
}

if message == "unknown global" {
return error.contains("global.get of locally defined global");
}

return false;
}
2 changes: 1 addition & 1 deletion tests/testsuite
Submodule testsuite updated 52 files
+51 −6 binary.wast
+17 −8 data.wast
+18 −5 elem.wast
+9 −0 global.wast
+51 −6 proposals/exception-handling/binary.wast
+17 −8 proposals/extended-const/data.wast
+18 −5 proposals/extended-const/elem.wast
+9 −0 proposals/extended-const/global.wast
+0 −964 proposals/function-references/binary-leb128.wast
+51 −6 proposals/function-references/binary.wast
+7 −6 proposals/function-references/br_on_non_null.wast
+4 −4 proposals/function-references/br_on_null.wast
+0 −351 proposals/function-references/bulk.wast
+0 −1,017 proposals/function-references/call_indirect.wast
+21 −15 proposals/function-references/call_ref.wast
+0 −345 proposals/function-references/data.wast
+0 −468 proposals/function-references/elem.wast
+0 −196 proposals/function-references/exports.wast
+30 −30 proposals/function-references/func.wast
+0 −552 proposals/function-references/func_bind.wast
+145 −19 proposals/function-references/global.wast
+0 −605 proposals/function-references/imports.wast
+0 −302 proposals/function-references/let.wast
+265 −0 proposals/function-references/local_get.wast
+0 −5,578 proposals/function-references/memory_copy.wast
+0 −686 proposals/function-references/memory_fill.wast
+0 −379 proposals/function-references/memory_grow.wast
+0 −967 proposals/function-references/memory_init.wast
+2 −2 proposals/function-references/ref_as_non_null.wast
+60 −44 proposals/function-references/return_call_ref.wast
+65 −15 proposals/function-references/table.wast
+0 −3,082 proposals/function-references/table_copy.wast
+0 −153 proposals/function-references/table_fill.wast
+0 −88 proposals/function-references/table_get.wast
+0 −173 proposals/function-references/table_grow.wast
+0 −2,143 proposals/function-references/table_init.wast
+0 −119 proposals/function-references/table_set.wast
+0 −86 proposals/function-references/table_size.wast
+22 −0 proposals/function-references/unreached-invalid.wast
+77 −0 proposals/function-references/unreached-valid.wast
+76 −0 proposals/gc/type-subtyping.wast
+51 −6 proposals/multi-memory/binary.wast
+17 −8 proposals/multi-memory/data.wast
+13 −0 proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast
+56 −0 proposals/relaxed-simd/i32x4_relaxed_trunc.wast
+25 −0 proposals/relaxed-simd/i8x16_relaxed_swizzle.wast
+78 −0 proposals/relaxed-simd/relaxed_fma_fms.wast
+43 −0 proposals/relaxed-simd/relaxed_laneselect.wast
+104 −0 proposals/relaxed-simd/relaxed_min_max.wast
+274 −0 tokens.wast
+1 −0 unreached-valid.wast
+1 −0 update-testsuite.sh

0 comments on commit f9a5709

Please sign in to comment.