Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RegexPathSpec documentation and MatchedPath improvements #8163

Merged
merged 7 commits into from Jun 16, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -21,6 +21,111 @@
import org.eclipse.jetty.util.log.Log;
import org.eclipse.jetty.util.log.Logger;

/**
* <p>
* RegexPathSpec is a PathSpec implementation for a {@link PathMappings} instance.
* </p>
*
* <p>
* Supports the standard Java regex found in {@link java.util.regex.Pattern}.
* </p>
*
* <p>
* Supports {@link PathSpecGroup} for {@link PathSpecGroup#EXACT}, {@link PathSpecGroup#PREFIX_GLOB}, {@link PathSpecGroup#MIDDLE_GLOB}, and {@link PathSpecGroup#SUFFIX_GLOB}.
* This is done by evaluating the signature or the provided regex pattern for what is a literal vs a glob (of any kind).
* <ul>
* <li>Only literals, it's a {@link PathSpecGroup#EXACT}.</li>
* <li>Starts with literals, ends with globs, it's a {@link PathSpecGroup#PREFIX_GLOB}</li>
* <li>Starts with glob, has at least 1 literal, then any thing else, it's a {@link PathSpecGroup#SUFFIX_GLOB}</li>
* <li>All other signatures are a {@link PathSpecGroup#MIDDLE_GLOB}</li>
* </ul>
* The use of regex capture groups, regex character classes, regex quantifiers, and regex special contructs
* will be identified as a glob (for signature determination), all other characters are identified
* as literal. The regex {@code ^} beginning of line, and regex {@code $} end of line are ignored.
* </p>
*
* <p>
* <b>Support for {@link MatchedPath} and PathMatch vs PathInfo</b>
* </p>
*
* <p>
* There's a few steps in evaluating the matched input path for determining where the split
* in the input path should occur for {@link MatchedPath#getPathMatch()} and {@link MatchedPath#getPathInfo()}.
* <ol>
* <li>If there are no regex capturing groups,
* the entire path is returned in {@link MatchedPath#getPathMatch()},
* and a null returned for {@link MatchedPath#getPathInfo()}</li>
* <li>If the named regex capturing group {@code name} is present, that group
* is returned in {@link MatchedPath#getPathMatch()}</li>
* <li>If the named regex capturing group {@code info} is present, that group
* is returned in {@link MatchedPath#getPathInfo()}</li>
* <li>If the regex is of group type {@link PathSpecGroup#PREFIX_GLOB},
* the beginning of the regex is a literal, so it is split at the start of
* {@code java.util.regex.Matcher.group(1)}),
* taking care to handle trailing slash properly so that {@link MatchedPath#getPathMatch()}
* does not end in it, and {@link MatchedPath#getPathInfo()} starts with it.</li>
gregw marked this conversation as resolved.
Show resolved Hide resolved
joakime marked this conversation as resolved.
Show resolved Hide resolved
* <li>
* All other RegexPathSpec signatures will return the entire path
* in {@link MatchedPath#getPathMatch()}, and a null returned for {@link MatchedPath#getPathInfo()}
* </li>
* </ol>
* </p>
*
* <p>
* Some examples:
* </p>
* <code>
* RegexPathSpec("^/[Tt]est(/.*)?$") - type: SUFFIX
* matched("/test/info")
* pathMatch: "/test/info"
* pathInfo: null
* matched("/Test/data")
* pathMatch: "/Test/data"
* pathInfo: null
*
* RegexPathSpec("^/test/info$") - type: EXACT
* matched("/test/info")
* pathMatch: "/test/info"
* pathInfo: null
*
* RegexPathSpec("^/t(.*)/c(.*)$") - type: MIDDLE
* matched("/test/code")
* pathMatch: "/test/code"
* pathInfo: null
*
* RegexPathSpec("^/test(/.*)$") - type: PREFIX
* matched("/test/more")
* pathMatch: "/test"
* pathInfo: "/more"
*
* RegexPathSpec("^/test(/i.*)(/c.*)?$") - type: PREFIX
* matched("/test/info")
* pathMatch: "/test"
* pathInfo: "/info"
* matched("/test/info/code")
* pathMatch: "/test"
* pathInfo: "/info/code"
* matched("/test/ice/cream")
* pathMatch: "/test"
* pathInfo: "/ice/cream"
*
* RegexPathSpec("^(?<name>\/.*)/.*\.do$") - type: SUFFIX
* matched("/test/info/code.do")
* pathMatch: "/test/info"
* pathInfo: "/code.do"
* matched("/a/b/c/d/e/f/g.do")
* pathMatch: "/a/b/c/d/e/f"
* pathInfo: "/g.do"
*
* RegexPathSpec("^(?<name>\/.*)(?<info>\/.*\.action)$") - type: MIDDLE
* matched("/test/info/code.action")
* pathMatch: "/test/info"
* pathInfo: "/code.action"
* matched("/a/b/c/d/e/f/g.action")
* pathMatch: "/a/b/c/d/e/f"
* pathInfo: "/g.action"
* </code>
*/
public class RegexPathSpec extends AbstractPathSpec
{
private static final Logger LOG = Log.getLogger(UriTemplatePathSpec.class);
Expand Down Expand Up @@ -54,8 +159,9 @@ public RegexPathSpec(String regex)
declaration = regex;
int specLength = declaration.length();
// build up a simple signature we can use to identify the grouping
boolean inTextList = false;
boolean inCharacterClass = false;
boolean inQuantifier = false;
boolean inCaptureGroup = false;
StringBuilder signature = new StringBuilder();

int pathDepth = 0;
Expand All @@ -68,8 +174,6 @@ public RegexPathSpec(String regex)
case '^': // ignore anchors
case '$': // ignore anchors
case '\'': // ignore escaping
case '(': // ignore grouping
case ')': // ignore grouping
break;
case '+': // single char quantifier
case '?': // single char quantifier
Expand All @@ -78,25 +182,32 @@ public RegexPathSpec(String regex)
case '.': // any char token
signature.append('g'); // glob
break;
case '{':
case '(': // in regex capture group
inCaptureGroup = true;
break;
case ')':
inCaptureGroup = false;
signature.append('g');
break;
case '{': // in regex quantifier
inQuantifier = true;
break;
case '}':
inQuantifier = false;
break;
case '[':
inTextList = true;
case '[': // in regex character class
inCharacterClass = true;
break;
case ']':
inTextList = false;
inCharacterClass = false;
signature.append('g'); // glob
break;
case '/':
if (!inTextList && !inQuantifier)
if (!inCharacterClass && !inQuantifier && !inCaptureGroup)
pathDepth++;
break;
default:
if (!inTextList && !inQuantifier && Character.isLetterOrDigit(c))
if (!inCharacterClass && !inQuantifier && !inCaptureGroup && Character.isLetterOrDigit(c))
{
if (last == '\\') // escaped
{
Expand Down Expand Up @@ -135,9 +246,9 @@ public RegexPathSpec(String regex)
String sig = signature.toString();

PathSpecGroup group;
if (Pattern.matches("^l*$", sig))
if (Pattern.matches("^l+$", sig))
group = PathSpecGroup.EXACT;
else if (Pattern.matches("^l*g+", sig))
else if (Pattern.matches("^l+g+", sig))
group = PathSpecGroup.PREFIX_GLOB;
else if (Pattern.matches("^g+l+.*", sig))
group = PathSpecGroup.SUFFIX_GLOB;
Expand Down Expand Up @@ -279,38 +390,100 @@ private class RegexMatchedPath implements MatchedPath
private final String path;
private final Matcher matcher;

/**
* Cached split index for pathMatch vs pathInfo.
*
* (-2) is for unsearched
* (-1) is for searched, but not found
* any other value is the index
*/
protected int splitIdx = -2;

public RegexMatchedPath(RegexPathSpec regexPathSpec, String path, Matcher matcher)
{
this.pathSpec = regexPathSpec;
this.path = path;
this.matcher = matcher;
}

@Override
public String getPathMatch()
protected int getSplitIndex()
{
try
if (splitIdx >= -1)
{
String p = matcher.group("name");
if (p != null)
{
return p;
}
return splitIdx;
}
catch (IllegalArgumentException ignore)

if (matcher.groupCount() >= 1)
{
// ignore if group name not found.
try
{
int end = matcher.end("name");
if (end >= (-1))
{
splitIdx = end;
return splitIdx;
}
}
catch (IllegalArgumentException ignore)
{
// ignore if group name not found.
}

// Try named group 'info'
try
{
int start = matcher.start("info");
if (start >= (-1))
{
splitIdx = start;
return splitIdx;
}
}
joakime marked this conversation as resolved.
Show resolved Hide resolved
catch (IllegalArgumentException ignore)
{
// ignore if group info not found.
}

if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB)
joakime marked this conversation as resolved.
Show resolved Hide resolved
{
int idx = matcher.start(1);
if (idx >= (-1))
{
splitIdx = idx;
return splitIdx;
}
}

splitIdx = -1; // not found
}
return splitIdx;
}

if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1)
@Override
public String getPathMatch()
{
if (matcher.groupCount() >= 1)
{
int idx = matcher.start(1);
if (idx > 0)
try
{
if (this.path.charAt(idx - 1) == '/')
idx--;
return this.path.substring(0, idx);
String p = matcher.group("name");
if (p != null)
{
return p;
}
}
catch (IllegalArgumentException ignore)
{
// ignore if group name not found.
}
}

int idx = getSplitIndex();
if (idx >= 0)
{
if (this.path.charAt(idx - 1) == '/')
idx--;
return this.path.substring(0, idx);
}

// default is the full path
Expand All @@ -320,23 +493,27 @@ public String getPathMatch()
@Override
public String getPathInfo()
{
try
if (matcher.groupCount() >= 1)
{
String p = matcher.group("info");
if (p != null)
// Try named group 'info'
try
{
return p;
String p = matcher.group("info");
if (p != null)
{
return p;
}
}
catch (IllegalArgumentException ignore)
{
// ignore if group info not found.
}
}
catch (IllegalArgumentException ignore)
{
// ignore if group info not found.
}

// Path Info only valid for PREFIX_GLOB
if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1)
int idx = getSplitIndex();
if (idx >= 0)
{
String pathInfo = matcher.group(1);
String pathInfo = this.path.substring(idx);
if ("".equals(pathInfo))
return "/";
else
Expand Down