From f9a5709068b211006a643576c0cf963fe09bd977 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Wed, 21 Sep 2022 11:23:15 -0500
Subject: [PATCH] Update the spec test suite submodule (#763)

* Update the spec test suite submodule

This commit updates the `tests/testsuite` submodule which is a copy of
the spec tests from upstream and the various proposals repositories. The
major change included here is the update to the lexing of the text
format to allow "reserved" tokens to have string tokens embedded within
them (in a sense). This doesn't affect any parsing in the crate, only
the tokenization of the input.

Additionally there are a few minor updates to error messages plus an
new exceptional case where the spec interpreter is printing "unknown
global" for defined globals which I'm not really sure what's going on.

* Fix tests

* Fix more tests

* Add support for the relaxed-simd spec test suite
---
 crates/wast/src/core/wast.rs                  |  10 ++
 crates/wast/src/lexer.rs                      | 161 ++++++++++++------
 crates/wast/src/names.rs                      |   2 +-
 .../wast/tests/parse-fail/bad-index.wat.err   |   2 +-
 tests/local/component-model/adapt.wast        |   4 +-
 tests/local/component-model/definedtypes.wast |   5 +-
 .../local/component-model/instance-type.wast  |   2 +-
 tests/local/component-model/invalid.wast      |   2 +-
 tests/local/component-model/types.wast        |   8 +-
 tests/local/multi-memory.wast                 |   2 +-
 tests/local/try.wast                          |   2 +-
 tests/roundtrip.rs                            |  11 ++
 tests/testsuite                               |   2 +-
 13 files changed, 149 insertions(+), 64 deletions(-)

diff --git a/crates/wast/src/core/wast.rs b/crates/wast/src/core/wast.rs
index 5780b9de4f..41437e02d8 100644
--- a/crates/wast/src/core/wast.rs
+++ b/crates/wast/src/core/wast.rs
@@ -76,6 +76,8 @@ pub enum WastRetCore<'a> {
     RefExtern(u32),
     /// A non-null funcref is expected.
     RefFunc(Option<Index<'a>>),
+
+    Either(Vec<WastRetCore<'a>>),
 }
 
 static RETS: &[(&str, fn(Parser<'_>) -> Result<WastRetCore<'_>>)] = {
@@ -89,6 +91,14 @@ static RETS: &[(&str, fn(Parser<'_>) -> Result<WastRetCore<'_>>)] = {
         ("ref.null", |p| Ok(RefNull(p.parse()?))),
         ("ref.extern", |p| Ok(RefExtern(p.parse()?))),
         ("ref.func", |p| Ok(RefFunc(p.parse()?))),
+        ("either", |p| {
+            p.depth_check()?;
+            let mut cases = Vec::new();
+            while !p.is_empty() {
+                cases.push(p.parens(|p| p.parse())?);
+            }
+            Ok(Either(cases))
+        }),
     ]
 };
 
diff --git a/crates/wast/src/lexer.rs b/crates/wast/src/lexer.rs
index ee40807f8d..a4f8f128c7 100644
--- a/crates/wast/src/lexer.rs
+++ b/crates/wast/src/lexer.rs
@@ -90,6 +90,12 @@ pub enum Token<'a> {
     Float(Float<'a>),
 }
 
+enum ReservedKind<'a> {
+    String(Cow<'a, [u8]>),
+    Idchars,
+    Reserved,
+}
+
 /// Errors that can be generated while lexing.
 ///
 /// All lexing errors have line/colum/position information as well as a
@@ -354,37 +360,51 @@ impl<'a> Lexer<'a> {
 
             b')' => Ok(Some(Token::RParen(self.split_first_byte()))),
 
-            b'"' => {
-                let val = self.string()?;
-                let src = &self.input[pos..self.cur()];
-                return Ok(Some(Token::String(WasmString(Box::new(WasmStringInner {
-                    val,
-                    src,
-                })))));
-            }
-
             // https://webassembly.github.io/spec/core/text/lexical.html#white-space
             b' ' | b'\n' | b'\r' | b'\t' => Ok(Some(Token::Whitespace(self.split_ws()))),
 
-            c @ idchars!() => {
-                let reserved = self.split_while(|b| matches!(b, idchars!()));
-
-                // https://webassembly.github.io/spec/core/text/values.html#integers
-                if let Some(number) = self.number(reserved) {
-                    Ok(Some(number))
-                // https://webassembly.github.io/spec/core/text/values.html#text-id
-                } else if *c == b'$' && reserved.len() > 1 {
-                    Ok(Some(Token::Id(reserved)))
-                // https://webassembly.github.io/spec/core/text/lexical.html#text-keyword
-                } else if b'a' <= *c && *c <= b'z' {
-                    Ok(Some(Token::Keyword(reserved)))
-                } else {
-                    Ok(Some(Token::Reserved(reserved)))
+            c @ (idchars!() | b'"') => {
+                let (kind, src) = self.split_reserved()?;
+                match kind {
+                    // If the reserved token was simply a single string then
+                    // that is converted to a standalone string token
+                    ReservedKind::String(val) => {
+                        return Ok(Some(Token::String(WasmString(Box::new(WasmStringInner {
+                            val,
+                            src,
+                        })))));
+                    }
+
+                    // If only idchars were consumed then this could be a
+                    // specific kind of standalone token we're interested in.
+                    ReservedKind::Idchars => {
+                        // https://webassembly.github.io/spec/core/text/values.html#integers
+                        if let Some(number) = self.number(src) {
+                            return Ok(Some(number));
+                        // https://webassembly.github.io/spec/core/text/values.html#text-id
+                        } else if *c == b'$' && src.len() > 1 {
+                            return Ok(Some(Token::Id(src)));
+                        // https://webassembly.github.io/spec/core/text/lexical.html#text-keyword
+                        } else if b'a' <= *c && *c <= b'z' {
+                            return Ok(Some(Token::Keyword(src)));
+                        }
+                    }
+
+                    // ... otherwise this was a conglomeration of idchars,
+                    // strings, or just idchars that don't match a prior rule,
+                    // meaning this falls through to the fallback `Reserved`
+                    // token.
+                    ReservedKind::Reserved => {}
                 }
+
+                Ok(Some(Token::Reserved(src)))
             }
 
             // This could be a line comment, otherwise `;` is a reserved token.
             // The second byte is checked to see if it's a `;;` line comment
+            //
+            // Note that this character being considered as part of a
+            // `reserved` token is part of the annotations proposal.
             b';' => match self.remaining.as_bytes().get(1) {
                 Some(b';') => {
                     let comment = self.split_until(b'\n');
@@ -395,6 +415,9 @@ impl<'a> Lexer<'a> {
             },
 
             // Other known reserved tokens other than `;`
+            //
+            // Note that these characters being considered as part of a
+            // `reserved` token is part of the annotations proposal.
             b',' | b'[' | b']' | b'{' | b'}' => Ok(Some(Token::Reserved(self.split_first_byte()))),
 
             _ => {
@@ -466,16 +489,74 @@ impl<'a> Lexer<'a> {
         ret
     }
 
-    fn split_while(&mut self, f: impl Fn(u8) -> bool) -> &'a str {
-        let pos = self
-            .remaining
-            .as_bytes()
-            .iter()
-            .position(|b| !f(*b))
-            .unwrap_or(self.remaining.len());
+    /// Splits off a "reserved" token which is then further processed later on
+    /// to figure out which kind of token it is `depending on `ReservedKind`.
+    ///
+    /// For more information on this method see the clarification at
+    /// https://github.com/WebAssembly/spec/pull/1499 but the general gist is
+    /// that this is parsing the grammar:
+    ///
+    /// ```text
+    /// reserved := (idchar | string)+
+    /// ```
+    ///
+    /// which means that it is eating any number of adjacent string/idchar
+    /// tokens (e.g. `a"b"c`) and returning the classification of what was
+    /// eaten. The classification assists in determining what the actual token
+    /// here eaten looks like.
+    fn split_reserved(&mut self) -> Result<(ReservedKind<'a>, &'a str), Error> {
+        let mut idchars = false;
+        let mut strings = 0u32;
+        let mut last_string_val = None;
+        let mut pos = 0;
+        while let Some(byte) = self.remaining.as_bytes().get(pos) {
+            match byte {
+                // Normal `idchars` production which appends to the reserved
+                // token that's being produced.
+                idchars!() => {
+                    idchars = true;
+                    pos += 1;
+                }
+
+                // https://webassembly.github.io/spec/core/text/values.html#text-string
+                b'"' => {
+                    strings += 1;
+                    pos += 1;
+                    let mut it = self.remaining[pos..].chars();
+                    let result = Lexer::parse_str(&mut it, self.allow_confusing_unicode);
+                    pos = self.remaining.len() - it.as_str().len();
+                    match result {
+                        Ok(s) => last_string_val = Some(s),
+                        Err(e) => {
+                            let start = self.input.len() - self.remaining.len();
+                            self.remaining = &self.remaining[pos..];
+                            let err_pos = match &e {
+                                LexError::UnexpectedEof => self.input.len(),
+                                _ => {
+                                    self.input[..start + pos]
+                                        .char_indices()
+                                        .next_back()
+                                        .unwrap()
+                                        .0
+                                }
+                            };
+                            return Err(self.error(err_pos, e));
+                        }
+                    }
+                }
+
+                // Nothing else is considered part of a reserved token
+                _ => break,
+            }
+        }
         let (ret, remaining) = self.remaining.split_at(pos);
         self.remaining = remaining;
-        ret
+        Ok(match (idchars, strings) {
+            (false, 0) => unreachable!(),
+            (false, 1) => (ReservedKind::String(last_string_val.unwrap()), ret),
+            (true, 0) => (ReservedKind::Idchars, ret),
+            _ => (ReservedKind::Reserved, ret),
+        })
     }
 
     fn number(&self, src: &'a str) -> Option<Token<'a>> {
@@ -688,24 +769,6 @@ impl<'a> Lexer<'a> {
         Ok(())
     }
 
-    /// Reads everything for a literal string except the leading `"`. Returns
-    /// the string value that has been read.
-    ///
-    /// https://webassembly.github.io/spec/core/text/values.html#text-string
-    fn string(&mut self) -> Result<Cow<'a, [u8]>, Error> {
-        let mut it = self.remaining[1..].chars();
-        let result = Lexer::parse_str(&mut it, self.allow_confusing_unicode);
-        let end = self.input.len() - it.as_str().len();
-        self.remaining = &self.input[end..];
-        result.map_err(|e| {
-            let err_pos = match &e {
-                LexError::UnexpectedEof => self.input.len(),
-                _ => self.input[..end].char_indices().next_back().unwrap().0,
-            };
-            self.error(err_pos, e)
-        })
-    }
-
     fn parse_str(
         it: &mut str::Chars<'a>,
         allow_confusing_unicode: bool,
diff --git a/crates/wast/src/names.rs b/crates/wast/src/names.rs
index b6cf06f443..7cbfc5d9ca 100644
--- a/crates/wast/src/names.rs
+++ b/crates/wast/src/names.rs
@@ -81,6 +81,6 @@ pub fn resolve_error(id: Id<'_>, ns: &str) -> Error {
     );
     Error::new(
         id.span(),
-        format!("failed to find {} named `${}`", ns, id.name()),
+        format!("unknown {ns}: failed to find name `${}`", id.name()),
     )
 }
diff --git a/crates/wast/tests/parse-fail/bad-index.wat.err b/crates/wast/tests/parse-fail/bad-index.wat.err
index 717d46017b..f380bddec1 100644
--- a/crates/wast/tests/parse-fail/bad-index.wat.err
+++ b/crates/wast/tests/parse-fail/bad-index.wat.err
@@ -1,4 +1,4 @@
-failed to find label named `$s`
+unknown label: failed to find name `$s`
      --> tests/parse-fail/bad-index.wat:1:18
       |
     1 | (func br_on_null $s)
diff --git a/tests/local/component-model/adapt.wast b/tests/local/component-model/adapt.wast
index b570323d0b..562a2cd9ac 100644
--- a/tests/local/component-model/adapt.wast
+++ b/tests/local/component-model/adapt.wast
@@ -252,7 +252,7 @@
     (core instance $i (instantiate $m))
     (core func (canon lower (func $i "")))
   )
-  "failed to find instance named `$i`")
+  "unknown instance: failed to find name `$i`")
 
 (assert_invalid
   (component
@@ -284,4 +284,4 @@
     (import "" (func $f))
     (func (export "foo") (canon lift (core func $f)))
   )
-  "failed to find core func named `$f`")
+  "unknown core func: failed to find name `$f`")
diff --git a/tests/local/component-model/definedtypes.wast b/tests/local/component-model/definedtypes.wast
index b124730a88..d79a15205a 100644
--- a/tests/local/component-model/definedtypes.wast
+++ b/tests/local/component-model/definedtypes.wast
@@ -57,7 +57,7 @@
   (component
     (type $t (variant (case "x" (refines $y)) (case $y "y" string)))
   )
-  "failed to find variant case named `$y`"
+  "unknown variant case"
 )
 
 (assert_invalid
@@ -65,9 +65,10 @@
     (type $t string)
     (type $v (variant (case "x" $t (refines $z))))
   )
-  "failed to find variant case named `$z`"
+  "unknown variant case"
 )
 
+
 (assert_invalid
   (component
     (type $t string)
diff --git a/tests/local/component-model/instance-type.wast b/tests/local/component-model/instance-type.wast
index 249b4fd592..c1acc813bd 100644
--- a/tests/local/component-model/instance-type.wast
+++ b/tests/local/component-model/instance-type.wast
@@ -204,7 +204,7 @@
     (type (instance
       (export "" (core module (type $t)))
     )))
-  "failed to find core type named `$t`")
+  "unknown core type")
 
 (assert_invalid
   (component
diff --git a/tests/local/component-model/invalid.wast b/tests/local/component-model/invalid.wast
index 0c5a6f46ed..3b57e3579b 100644
--- a/tests/local/component-model/invalid.wast
+++ b/tests/local/component-model/invalid.wast
@@ -11,7 +11,7 @@
   (component quote
     "(export \"\" (func $foo))"
   )
-  "failed to find func named")
+  "unknown func")
 
 (assert_malformed
   (component quote
diff --git a/tests/local/component-model/types.wast b/tests/local/component-model/types.wast
index 855ec64cd6..74d5355ba2 100644
--- a/tests/local/component-model/types.wast
+++ b/tests/local/component-model/types.wast
@@ -124,7 +124,7 @@
       (alias outer $c $t (type))
     ))
   )
-  "failed to find core type named `$t`")
+  "unknown core type")
 
 (assert_invalid
   (component $c
@@ -149,7 +149,7 @@
       (alias outer $c $t (type))
     ))
   )
-  "failed to find type named `$t`")
+  "unknown type")
 
 (assert_invalid
   (component $c
@@ -203,7 +203,7 @@
       (alias outer $c $t (type))
     ))
   )
-  "failed to find type named `$t`")
+  "unknown type")
 
 (assert_invalid
   (component $c
@@ -288,4 +288,4 @@
     (import "" (type (eq 0)))
     (export "" (type (eq 0)))
   ))
-)
\ No newline at end of file
+)
diff --git a/tests/local/multi-memory.wast b/tests/local/multi-memory.wast
index dc4b4755ac..89bc1a92ca 100644
--- a/tests/local/multi-memory.wast
+++ b/tests/local/multi-memory.wast
@@ -216,7 +216,7 @@
   (module quote
     "(func i32.load $a)"
   )
-  "failed to find memory")
+  "unknown memory")
 
 (module
   (memory 1)
diff --git a/tests/local/try.wast b/tests/local/try.wast
index bc4a93d650..e4ffa23d48 100644
--- a/tests/local/try.wast
+++ b/tests/local/try.wast
@@ -52,4 +52,4 @@
   (module quote
     "(func (try $l (do) (delegate $l)))"
   )
-  "failed to find label")
+  "unknown label")
diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs
index 49998d0631..ec5978e18e 100644
--- a/tests/roundtrip.rs
+++ b/tests/roundtrip.rs
@@ -132,6 +132,12 @@ fn skip_test(test: &Path, contents: &[u8]) -> bool {
         "function-references/func_bind.wast",
         "function-references/ref_as_non_null.wast",
         "function-references/return_call_ref.wast",
+        // TODO: new syntax for table types has been added with an optional
+        // initializer which needs parsing in the text format.
+        "function-references/table.wast",
+        // TODO: This references an instruction which has since been removed
+        // from the proposal so the test needs an update.
+        "relaxed-simd/relaxed_fma_fms.wast",
     ];
     if broken.iter().any(|x| test.ends_with(x)) {
         return true;
@@ -460,6 +466,7 @@ impl TestState {
                 "component-model" => features.component_model = true,
                 "multi-memory" => features.multi_memory = true,
                 "extended-const" => features.extended_const = true,
+                "relaxed-simd" => features.relaxed_simd = true,
                 _ => {}
             }
         }
@@ -602,5 +609,9 @@ fn error_matches(error: &str, message: &str) -> bool {
         return error.contains("invalid u32 number: constant out of range");
     }
 
+    if message == "unknown global" {
+        return error.contains("global.get of locally defined global");
+    }
+
     return false;
 }
diff --git a/tests/testsuite b/tests/testsuite
index d42da0117f..4f77306bb6 160000
--- a/tests/testsuite
+++ b/tests/testsuite
@@ -1 +1 @@
-Subproject commit d42da0117f7a93c6a9127e2b9eec64749152c4c1
+Subproject commit 4f77306bb63151631d84f58dedf67958eb9911b9