diff --git a/bleach/html5lib_shim.py b/bleach/html5lib_shim.py
index aa5189b1..ca1cc8c8 100644
--- a/bleach/html5lib_shim.py
+++ b/bleach/html5lib_shim.py
@@ -395,10 +395,17 @@ def __iter__(self):
# followed by a series of characters. It's treated as a tag
# name that abruptly ends, but we should treat that like
# character data
- yield {
- "type": TAG_TOKEN_TYPE_CHARACTERS,
- "data": "<" + self.currentToken["name"],
- }
+ yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
+ elif last_error_token["data"] in (
+ "eof-in-attribute-name",
+ "eof-in-attribute-value-no-quotes",
+ ):
+ # Handle the case where the text being parsed ends with <
+ # followed by a series of characters and then space and then
+ # more characters. It's treated as a tag name followed by an
+ # attribute that abruptly ends, but we should treat that like
+ # character data.
+ yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
else:
yield last_error_token
diff --git a/tests/test_clean.py b/tests/test_clean.py
index 73946a1f..dc129d0e 100644
--- a/tests/test_clean.py
+++ b/tests/test_clean.py
@@ -163,6 +163,10 @@ def test_bare_entities_get_escaped_correctly(text, expected):
("", "<y>"),
+ # this is an eof-in-attribute-name parser error
+ ("