Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: support wildcard namespaces in xpath queries
This is almost as fast as a standard child-axis search, and much faster than the builtin or using local-name(): //span 18.923k (± 8.9%) i/s - 93.906k in 5.010792s //*[local-name()='span'] 1.849k (± 2.8%) i/s - 9.261k in 5.011560s //*[nokogiri-builtin:local-name-is('span')] 3.191k (± 2.4%) i/s - 16.150k in 5.064798s //*:span 18.016k (± 4.6%) i/s - 89.900k in 5.003444s Comparison: //span: 18922.5 i/s //*:span: 18016.5 i/s - same-ish: difference falls within error //*[nokogiri-builtin:local-name-is('span')]: 3190.6 i/s - 5.93x (± 0.00) slower //*[local-name()='span']: 1849.4 i/s - 10.23x (± 0.00) slower
- Loading branch information
1 parent
1397529
commit 15fb707
Showing
4 changed files
with
144 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
From 96e31fa0a94daa42aae174f2eb3f42e1ba7d5720 Mon Sep 17 00:00:00 2001 | ||
From: Mike Dalessio <mike.dalessio@gmail.com> | ||
Date: Fri, 24 Dec 2021 19:08:01 -0500 | ||
Subject: [PATCH] attempt to hack in wildcard namespaces to xpath | ||
|
||
I'm not confident this is a bulletproof patch. | ||
--- | ||
xpath.c | 24 ++++++++++++++++++------ | ||
1 file changed, 18 insertions(+), 6 deletions(-) | ||
|
||
diff --git a/xpath.c b/xpath.c | ||
index 1aa2f1a..dd28d4d 100644 | ||
--- a/xpath.c | ||
+++ b/xpath.c | ||
@@ -146,6 +146,9 @@ | ||
#define XPATH_MAX_RECURSION_DEPTH 5000 | ||
#endif | ||
|
||
+#define WILDCARD_PREFIX "*" | ||
+#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, prefix) | ||
+ | ||
/* | ||
* TODO: | ||
* There are a few spots where some tests are done which depend upon ascii | ||
@@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test, | ||
SKIP_BLANKS; | ||
|
||
if ((name == NULL) && (CUR == '*')) { | ||
- /* | ||
- * All elements | ||
- */ | ||
NEXT; | ||
- *test = NODE_TEST_ALL; | ||
- return(NULL); | ||
+ if (CUR != ':') { | ||
+ /* | ||
+ * All elements | ||
+ */ | ||
+ *test = NODE_TEST_ALL; | ||
+ return(NULL); | ||
+ } | ||
+ name = xmlCharStrdup(WILDCARD_PREFIX); | ||
} | ||
|
||
if (name == NULL) | ||
@@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) { | ||
} | ||
#endif | ||
if (CUR == '*') { | ||
+ if (NXT(1) == ':') { | ||
+ NEXT; | ||
+ name = xmlCharStrdup(WILDCARD_PREFIX); | ||
+ } | ||
axis = AXIS_CHILD; | ||
} else { | ||
if (name == NULL) | ||
@@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt, | ||
/* | ||
* Setup namespaces. | ||
*/ | ||
- if (prefix != NULL) { | ||
+ if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) { | ||
URI = xmlXPathNsLookup(xpctxt, prefix); | ||
if (URI == NULL) { | ||
xmlXPathReleaseObject(xpctxt, obj); | ||
@@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt, | ||
{ | ||
XP_TEST_HIT | ||
} | ||
+ } else if (IS_WILDCARD_PREFIX(prefix)) { | ||
+ XP_TEST_HIT | ||
} else { | ||
if ((cur->ns != NULL) && | ||
(xmlStrEqual(URI, cur->ns->href))) | ||
-- | ||
2.31.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters