diff --git a/crates/wast/src/core/wast.rs b/crates/wast/src/core/wast.rs index 5780b9de4f..41437e02d8 100644 --- a/crates/wast/src/core/wast.rs +++ b/crates/wast/src/core/wast.rs @@ -76,6 +76,8 @@ pub enum WastRetCore<'a> { RefExtern(u32), /// A non-null funcref is expected. RefFunc(Option>), + + Either(Vec>), } static RETS: &[(&str, fn(Parser<'_>) -> Result>)] = { @@ -89,6 +91,14 @@ static RETS: &[(&str, fn(Parser<'_>) -> Result>)] = { ("ref.null", |p| Ok(RefNull(p.parse()?))), ("ref.extern", |p| Ok(RefExtern(p.parse()?))), ("ref.func", |p| Ok(RefFunc(p.parse()?))), + ("either", |p| { + p.depth_check()?; + let mut cases = Vec::new(); + while !p.is_empty() { + cases.push(p.parens(|p| p.parse())?); + } + Ok(Either(cases)) + }), ] }; diff --git a/crates/wast/src/lexer.rs b/crates/wast/src/lexer.rs index ee40807f8d..a4f8f128c7 100644 --- a/crates/wast/src/lexer.rs +++ b/crates/wast/src/lexer.rs @@ -90,6 +90,12 @@ pub enum Token<'a> { Float(Float<'a>), } +enum ReservedKind<'a> { + String(Cow<'a, [u8]>), + Idchars, + Reserved, +} + /// Errors that can be generated while lexing. /// /// All lexing errors have line/colum/position information as well as a @@ -354,37 +360,51 @@ impl<'a> Lexer<'a> { b')' => Ok(Some(Token::RParen(self.split_first_byte()))), - b'"' => { - let val = self.string()?; - let src = &self.input[pos..self.cur()]; - return Ok(Some(Token::String(WasmString(Box::new(WasmStringInner { - val, - src, - }))))); - } - // https://webassembly.github.io/spec/core/text/lexical.html#white-space b' ' | b'\n' | b'\r' | b'\t' => Ok(Some(Token::Whitespace(self.split_ws()))), - c @ idchars!() => { - let reserved = self.split_while(|b| matches!(b, idchars!())); - - // https://webassembly.github.io/spec/core/text/values.html#integers - if let Some(number) = self.number(reserved) { - Ok(Some(number)) - // https://webassembly.github.io/spec/core/text/values.html#text-id - } else if *c == b'$' && reserved.len() > 1 { - Ok(Some(Token::Id(reserved))) - // https://webassembly.github.io/spec/core/text/lexical.html#text-keyword - } else if b'a' <= *c && *c <= b'z' { - Ok(Some(Token::Keyword(reserved))) - } else { - Ok(Some(Token::Reserved(reserved))) + c @ (idchars!() | b'"') => { + let (kind, src) = self.split_reserved()?; + match kind { + // If the reserved token was simply a single string then + // that is converted to a standalone string token + ReservedKind::String(val) => { + return Ok(Some(Token::String(WasmString(Box::new(WasmStringInner { + val, + src, + }))))); + } + + // If only idchars were consumed then this could be a + // specific kind of standalone token we're interested in. + ReservedKind::Idchars => { + // https://webassembly.github.io/spec/core/text/values.html#integers + if let Some(number) = self.number(src) { + return Ok(Some(number)); + // https://webassembly.github.io/spec/core/text/values.html#text-id + } else if *c == b'$' && src.len() > 1 { + return Ok(Some(Token::Id(src))); + // https://webassembly.github.io/spec/core/text/lexical.html#text-keyword + } else if b'a' <= *c && *c <= b'z' { + return Ok(Some(Token::Keyword(src))); + } + } + + // ... otherwise this was a conglomeration of idchars, + // strings, or just idchars that don't match a prior rule, + // meaning this falls through to the fallback `Reserved` + // token. + ReservedKind::Reserved => {} } + + Ok(Some(Token::Reserved(src))) } // This could be a line comment, otherwise `;` is a reserved token. // The second byte is checked to see if it's a `;;` line comment + // + // Note that this character being considered as part of a + // `reserved` token is part of the annotations proposal. b';' => match self.remaining.as_bytes().get(1) { Some(b';') => { let comment = self.split_until(b'\n'); @@ -395,6 +415,9 @@ impl<'a> Lexer<'a> { }, // Other known reserved tokens other than `;` + // + // Note that these characters being considered as part of a + // `reserved` token is part of the annotations proposal. b',' | b'[' | b']' | b'{' | b'}' => Ok(Some(Token::Reserved(self.split_first_byte()))), _ => { @@ -466,16 +489,74 @@ impl<'a> Lexer<'a> { ret } - fn split_while(&mut self, f: impl Fn(u8) -> bool) -> &'a str { - let pos = self - .remaining - .as_bytes() - .iter() - .position(|b| !f(*b)) - .unwrap_or(self.remaining.len()); + /// Splits off a "reserved" token which is then further processed later on + /// to figure out which kind of token it is `depending on `ReservedKind`. + /// + /// For more information on this method see the clarification at + /// https://github.com/WebAssembly/spec/pull/1499 but the general gist is + /// that this is parsing the grammar: + /// + /// ```text + /// reserved := (idchar | string)+ + /// ``` + /// + /// which means that it is eating any number of adjacent string/idchar + /// tokens (e.g. `a"b"c`) and returning the classification of what was + /// eaten. The classification assists in determining what the actual token + /// here eaten looks like. + fn split_reserved(&mut self) -> Result<(ReservedKind<'a>, &'a str), Error> { + let mut idchars = false; + let mut strings = 0u32; + let mut last_string_val = None; + let mut pos = 0; + while let Some(byte) = self.remaining.as_bytes().get(pos) { + match byte { + // Normal `idchars` production which appends to the reserved + // token that's being produced. + idchars!() => { + idchars = true; + pos += 1; + } + + // https://webassembly.github.io/spec/core/text/values.html#text-string + b'"' => { + strings += 1; + pos += 1; + let mut it = self.remaining[pos..].chars(); + let result = Lexer::parse_str(&mut it, self.allow_confusing_unicode); + pos = self.remaining.len() - it.as_str().len(); + match result { + Ok(s) => last_string_val = Some(s), + Err(e) => { + let start = self.input.len() - self.remaining.len(); + self.remaining = &self.remaining[pos..]; + let err_pos = match &e { + LexError::UnexpectedEof => self.input.len(), + _ => { + self.input[..start + pos] + .char_indices() + .next_back() + .unwrap() + .0 + } + }; + return Err(self.error(err_pos, e)); + } + } + } + + // Nothing else is considered part of a reserved token + _ => break, + } + } let (ret, remaining) = self.remaining.split_at(pos); self.remaining = remaining; - ret + Ok(match (idchars, strings) { + (false, 0) => unreachable!(), + (false, 1) => (ReservedKind::String(last_string_val.unwrap()), ret), + (true, 0) => (ReservedKind::Idchars, ret), + _ => (ReservedKind::Reserved, ret), + }) } fn number(&self, src: &'a str) -> Option> { @@ -688,24 +769,6 @@ impl<'a> Lexer<'a> { Ok(()) } - /// Reads everything for a literal string except the leading `"`. Returns - /// the string value that has been read. - /// - /// https://webassembly.github.io/spec/core/text/values.html#text-string - fn string(&mut self) -> Result, Error> { - let mut it = self.remaining[1..].chars(); - let result = Lexer::parse_str(&mut it, self.allow_confusing_unicode); - let end = self.input.len() - it.as_str().len(); - self.remaining = &self.input[end..]; - result.map_err(|e| { - let err_pos = match &e { - LexError::UnexpectedEof => self.input.len(), - _ => self.input[..end].char_indices().next_back().unwrap().0, - }; - self.error(err_pos, e) - }) - } - fn parse_str( it: &mut str::Chars<'a>, allow_confusing_unicode: bool, diff --git a/crates/wast/src/names.rs b/crates/wast/src/names.rs index b6cf06f443..7cbfc5d9ca 100644 --- a/crates/wast/src/names.rs +++ b/crates/wast/src/names.rs @@ -81,6 +81,6 @@ pub fn resolve_error(id: Id<'_>, ns: &str) -> Error { ); Error::new( id.span(), - format!("failed to find {} named `${}`", ns, id.name()), + format!("unknown {ns}: failed to find name `${}`", id.name()), ) } diff --git a/crates/wast/tests/parse-fail/bad-index.wat.err b/crates/wast/tests/parse-fail/bad-index.wat.err index 717d46017b..f380bddec1 100644 --- a/crates/wast/tests/parse-fail/bad-index.wat.err +++ b/crates/wast/tests/parse-fail/bad-index.wat.err @@ -1,4 +1,4 @@ -failed to find label named `$s` +unknown label: failed to find name `$s` --> tests/parse-fail/bad-index.wat:1:18 | 1 | (func br_on_null $s) diff --git a/tests/local/component-model/adapt.wast b/tests/local/component-model/adapt.wast index b570323d0b..562a2cd9ac 100644 --- a/tests/local/component-model/adapt.wast +++ b/tests/local/component-model/adapt.wast @@ -252,7 +252,7 @@ (core instance $i (instantiate $m)) (core func (canon lower (func $i ""))) ) - "failed to find instance named `$i`") + "unknown instance: failed to find name `$i`") (assert_invalid (component @@ -284,4 +284,4 @@ (import "" (func $f)) (func (export "foo") (canon lift (core func $f))) ) - "failed to find core func named `$f`") + "unknown core func: failed to find name `$f`") diff --git a/tests/local/component-model/definedtypes.wast b/tests/local/component-model/definedtypes.wast index b124730a88..d79a15205a 100644 --- a/tests/local/component-model/definedtypes.wast +++ b/tests/local/component-model/definedtypes.wast @@ -57,7 +57,7 @@ (component (type $t (variant (case "x" (refines $y)) (case $y "y" string))) ) - "failed to find variant case named `$y`" + "unknown variant case" ) (assert_invalid @@ -65,9 +65,10 @@ (type $t string) (type $v (variant (case "x" $t (refines $z)))) ) - "failed to find variant case named `$z`" + "unknown variant case" ) + (assert_invalid (component (type $t string) diff --git a/tests/local/component-model/instance-type.wast b/tests/local/component-model/instance-type.wast index 249b4fd592..c1acc813bd 100644 --- a/tests/local/component-model/instance-type.wast +++ b/tests/local/component-model/instance-type.wast @@ -204,7 +204,7 @@ (type (instance (export "" (core module (type $t))) ))) - "failed to find core type named `$t`") + "unknown core type") (assert_invalid (component diff --git a/tests/local/component-model/invalid.wast b/tests/local/component-model/invalid.wast index 0c5a6f46ed..3b57e3579b 100644 --- a/tests/local/component-model/invalid.wast +++ b/tests/local/component-model/invalid.wast @@ -11,7 +11,7 @@ (component quote "(export \"\" (func $foo))" ) - "failed to find func named") + "unknown func") (assert_malformed (component quote diff --git a/tests/local/component-model/types.wast b/tests/local/component-model/types.wast index 855ec64cd6..74d5355ba2 100644 --- a/tests/local/component-model/types.wast +++ b/tests/local/component-model/types.wast @@ -124,7 +124,7 @@ (alias outer $c $t (type)) )) ) - "failed to find core type named `$t`") + "unknown core type") (assert_invalid (component $c @@ -149,7 +149,7 @@ (alias outer $c $t (type)) )) ) - "failed to find type named `$t`") + "unknown type") (assert_invalid (component $c @@ -203,7 +203,7 @@ (alias outer $c $t (type)) )) ) - "failed to find type named `$t`") + "unknown type") (assert_invalid (component $c @@ -288,4 +288,4 @@ (import "" (type (eq 0))) (export "" (type (eq 0))) )) -) \ No newline at end of file +) diff --git a/tests/local/multi-memory.wast b/tests/local/multi-memory.wast index dc4b4755ac..89bc1a92ca 100644 --- a/tests/local/multi-memory.wast +++ b/tests/local/multi-memory.wast @@ -216,7 +216,7 @@ (module quote "(func i32.load $a)" ) - "failed to find memory") + "unknown memory") (module (memory 1) diff --git a/tests/local/try.wast b/tests/local/try.wast index bc4a93d650..e4ffa23d48 100644 --- a/tests/local/try.wast +++ b/tests/local/try.wast @@ -52,4 +52,4 @@ (module quote "(func (try $l (do) (delegate $l)))" ) - "failed to find label") + "unknown label") diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs index 49998d0631..ec5978e18e 100644 --- a/tests/roundtrip.rs +++ b/tests/roundtrip.rs @@ -132,6 +132,12 @@ fn skip_test(test: &Path, contents: &[u8]) -> bool { "function-references/func_bind.wast", "function-references/ref_as_non_null.wast", "function-references/return_call_ref.wast", + // TODO: new syntax for table types has been added with an optional + // initializer which needs parsing in the text format. + "function-references/table.wast", + // TODO: This references an instruction which has since been removed + // from the proposal so the test needs an update. + "relaxed-simd/relaxed_fma_fms.wast", ]; if broken.iter().any(|x| test.ends_with(x)) { return true; @@ -460,6 +466,7 @@ impl TestState { "component-model" => features.component_model = true, "multi-memory" => features.multi_memory = true, "extended-const" => features.extended_const = true, + "relaxed-simd" => features.relaxed_simd = true, _ => {} } } @@ -602,5 +609,9 @@ fn error_matches(error: &str, message: &str) -> bool { return error.contains("invalid u32 number: constant out of range"); } + if message == "unknown global" { + return error.contains("global.get of locally defined global"); + } + return false; } diff --git a/tests/testsuite b/tests/testsuite index d42da0117f..4f77306bb6 160000 --- a/tests/testsuite +++ b/tests/testsuite @@ -1 +1 @@ -Subproject commit d42da0117f7a93c6a9127e2b9eec64749152c4c1 +Subproject commit 4f77306bb63151631d84f58dedf67958eb9911b9