Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RegexPathSpec documentation and MatchedPath improvements #8163

Merged
merged 7 commits into from Jun 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -21,6 +21,77 @@
import org.eclipse.jetty.util.log.Log;
import org.eclipse.jetty.util.log.Logger;

/**
* <p>
* RegexPathSpec is a PathSpec implementation for a {@link PathMappings} instance.
* </p>
*
* <p>
* Supports the standard Java regex found in {@link java.util.regex.Pattern}.
* </p>
*
* <p>
* Supports {@link PathSpecGroup} for {@link PathSpecGroup#EXACT}, {@link PathSpecGroup#PREFIX_GLOB}, {@link PathSpecGroup#MIDDLE_GLOB}, and {@link PathSpecGroup#SUFFIX_GLOB}.
* This is done by evaluating the signature or the provided regex pattern for what is a literal vs a glob (of any kind).
* </p>
*
* <ul>
* <li>Only literals, it's a {@link PathSpecGroup#EXACT}.</li>
* <li>Starts with literals, ends with globs, it's a {@link PathSpecGroup#PREFIX_GLOB}</li>
* <li>Starts with glob, has at least 1 literal, then any thing else, it's a {@link PathSpecGroup#SUFFIX_GLOB}</li>
* <li>All other signatures are a {@link PathSpecGroup#MIDDLE_GLOB}</li>
* </ul>
*
* <p>
* The use of regex capture groups, regex character classes, regex quantifiers, and regex special contructs
* will be identified as a glob (for signature determination), all other characters are identified
* as literal. The regex {@code ^} beginning of line, and regex {@code $} end of line are ignored.
* </p>
*
* <p>
* <b>Support for {@link MatchedPath} and PathMatch vs PathInfo</b>
* </p>
*
* <p>
* There's a few steps in evaluating the matched input path for determining where the split
* in the input path should occur for {@link MatchedPath#getPathMatch()} and {@link MatchedPath#getPathInfo()}.
* </p>
*
* <ol>
* <li>
* If there are no regex capturing groups,
* the entire path is returned in {@link MatchedPath#getPathMatch()},
* and a null returned for {@link MatchedPath#getPathInfo()}
* </li>
* <li>
* If both the named regex capturing groups {@code name} and {@code info} are present, then
* the {@code name} group is returned in {@link MatchedPath#getPathMatch()} and the
* {@code info} group is returned in {@link MatchedPath#getPathInfo()}
* </li>
* <li>
* If there is only 1 regex capturing group
* <ul>
* <li>
* If the named regex capturing group {@code name} is present, the
* input path up to the end of the capturing group is returned
* in {@link MatchedPath#getPathMatch()} and any following characters (or null)
* are returned in {@link MatchedPath#getPathInfo()}
* </li>
* <li>
* other wise the input path up to the start of the capturing group is returned
* in {@link MatchedPath#getPathMatch()} and any following characters (or null)
* are returned in {@link MatchedPath#getPathInfo()}
* </li>
* </ul>
* If the split on pathMatch ends with {@code /} AND the pathInfo doesn't start with {@code /}
* then the slash is moved from pathMatch to pathInfo.
* </li>
* <li>
* All other RegexPathSpec signatures will return the entire path
* in {@link MatchedPath#getPathMatch()}, and a null returned for {@link MatchedPath#getPathInfo()}
* </li>
* </ol>
*/
public class RegexPathSpec extends AbstractPathSpec
{
private static final Logger LOG = Log.getLogger(UriTemplatePathSpec.class);
Expand Down Expand Up @@ -54,8 +125,9 @@ public RegexPathSpec(String regex)
declaration = regex;
int specLength = declaration.length();
// build up a simple signature we can use to identify the grouping
boolean inTextList = false;
boolean inCharacterClass = false;
boolean inQuantifier = false;
boolean inCaptureGroup = false;
StringBuilder signature = new StringBuilder();

int pathDepth = 0;
Expand All @@ -68,8 +140,6 @@ public RegexPathSpec(String regex)
case '^': // ignore anchors
case '$': // ignore anchors
case '\'': // ignore escaping
case '(': // ignore grouping
case ')': // ignore grouping
break;
case '+': // single char quantifier
case '?': // single char quantifier
Expand All @@ -78,25 +148,32 @@ public RegexPathSpec(String regex)
case '.': // any char token
signature.append('g'); // glob
break;
case '{':
case '(': // in regex capture group
inCaptureGroup = true;
break;
case ')':
inCaptureGroup = false;
signature.append('g');
break;
case '{': // in regex quantifier
inQuantifier = true;
break;
case '}':
inQuantifier = false;
break;
case '[':
inTextList = true;
case '[': // in regex character class
inCharacterClass = true;
break;
case ']':
inTextList = false;
inCharacterClass = false;
signature.append('g'); // glob
break;
case '/':
if (!inTextList && !inQuantifier)
if (!inCharacterClass && !inQuantifier && !inCaptureGroup)
pathDepth++;
break;
default:
if (!inTextList && !inQuantifier && Character.isLetterOrDigit(c))
if (!inCharacterClass && !inQuantifier && !inCaptureGroup && Character.isLetterOrDigit(c))
{
if (last == '\\') // escaped
{
Expand Down Expand Up @@ -135,9 +212,9 @@ public RegexPathSpec(String regex)
String sig = signature.toString();

PathSpecGroup group;
if (Pattern.matches("^l*$", sig))
if (Pattern.matches("^l+$", sig))
group = PathSpecGroup.EXACT;
else if (Pattern.matches("^l*g+", sig))
else if (Pattern.matches("^l+g+", sig))
group = PathSpecGroup.PREFIX_GLOB;
else if (Pattern.matches("^g+l+.*", sig))
group = PathSpecGroup.SUFFIX_GLOB;
Expand Down Expand Up @@ -193,44 +270,19 @@ public int getPathDepth()
@Override
public String getPathInfo(String path)
{
// Path Info only valid for PREFIX_GLOB types
if (_group == PathSpecGroup.PREFIX_GLOB)
{
Matcher matcher = getMatcher(path);
if (matcher.matches())
{
if (matcher.groupCount() >= 1)
{
String pathInfo = matcher.group(1);
if ("".equals(pathInfo))
return "/";
else
return pathInfo;
}
}
}
return null;
MatchedPath matched = matched(path);
if (matched == null)
return null;
return matched.getPathInfo();
}

@Override
public String getPathMatch(String path)
{
Matcher matcher = getMatcher(path);
if (matcher.matches())
{
if (_group == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1)
{
int idx = matcher.start(1);
if (idx > 0)
{
if (path.charAt(idx - 1) == '/')
idx--;
return path.substring(0, idx);
}
}
return path;
}
return null;
MatchedPath matched = matched(path);
if (matched == null)
return "";
return matched.getPathMatch();
}

@Override
Expand Down Expand Up @@ -277,74 +329,117 @@ private class RegexMatchedPath implements MatchedPath
{
private final RegexPathSpec pathSpec;
private final String path;
private final Matcher matcher;
private String pathMatch;
private String pathInfo;

public RegexMatchedPath(RegexPathSpec regexPathSpec, String path, Matcher matcher)
{
this.pathSpec = regexPathSpec;
this.path = path;
this.matcher = matcher;

calcPathMatchInfo(matcher);
}

@Override
public String getPathMatch()
private void calcPathMatchInfo(Matcher matcher)
{
try
{
String p = matcher.group("name");
if (p != null)
{
return p;
}
}
catch (IllegalArgumentException ignore)
int groupCount = matcher.groupCount();


if (groupCount == 0)
{
// ignore if group name not found.
pathMatch = path;
pathInfo = null;
return;
}

if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1)
if (groupCount == 1)
{
// we know we are splitting
int idxNameEnd = endOf(matcher, "name");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ultimately this lookup makes me itch! If they are not using the named group, then an exception is created and ignored for every lookup attempt.

For now, can we add a TODO that when we scan the pattern, we should look for the presence of "" and only do this lookup if we have seen it in the pattern.

if (idxNameEnd >= 0)
{
pathMatch = path.substring(0, idxNameEnd);
pathInfo = path.substring(idxNameEnd);

// If split on pathMatch ends with '/'
// AND pathInfo doesn't have one, move the slash to pathInfo only move 1 level
if (pathMatch.length() > 0 && pathMatch.charAt(pathMatch.length() - 1) == '/' &&
!pathInfo.startsWith("/"))
{
pathMatch = pathMatch.substring(0, pathMatch.length() - 1);
pathInfo = '/' + pathInfo;
}
return;
}

// Use start of anonymous group
int idx = matcher.start(1);
if (idx > 0)
if (idx >= 0)
{
if (this.path.charAt(idx - 1) == '/')
idx--;
return this.path.substring(0, idx);
pathMatch = path.substring(0, idx);
pathInfo = path.substring(idx);

if (pathMatch.length() > 0 && pathMatch.charAt(pathMatch.length() - 1) == '/' &&
!pathInfo.startsWith("/"))
{
pathMatch = pathMatch.substring(0, pathMatch.length() - 1);
pathInfo = '/' + pathInfo;
}
return;
}
}

// default is the full path
return this.path;
// Reach here we have 2+ groups

String gName = valueOf(matcher, "name");
String gInfo = valueOf(matcher, "info");
Comment on lines +394 to +395
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two exceptions may be created and ignored here. So again, I think we need a TODO to optimise that away in future.


// if both named groups exist
if (gName != null && gInfo != null)
{
pathMatch = gName;
pathInfo = gInfo;
return;
}

pathMatch = path;
pathInfo = null;
}

@Override
public String getPathInfo()
private String valueOf(Matcher matcher, String groupName)
{
try
{
String p = matcher.group("info");
if (p != null)
{
return p;
}
return matcher.group(groupName);
}
catch (IllegalArgumentException ignore)
catch (IllegalArgumentException notFound)
{
// ignore if group info not found.
return null;
}
}

// Path Info only valid for PREFIX_GLOB
if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1)
private int endOf(Matcher matcher, String groupName)
{
try
{
return matcher.end(groupName);
}
catch (IllegalArgumentException notFound)
{
String pathInfo = matcher.group(1);
if ("".equals(pathInfo))
return "/";
else
return pathInfo;
return -2;
}
}

// default is null
return null;
@Override
public String getPathMatch()
{
return this.pathMatch;
}

@Override
public String getPathInfo()
{
return this.pathInfo;
}

@Override
Expand All @@ -353,7 +448,6 @@ public String toString()
return getClass().getSimpleName() + "[" +
"pathSpec=" + pathSpec +
", path=\"" + path + "\"" +
", matcher=" + matcher +
']';
}
}
Expand Down