Skip to content

Commit

Permalink
Further optimize Api.parse() by removing whitespace in the parsing pa…
Browse files Browse the repository at this point in the history
…ss, and avoiding multiple string copies.

PiperOrigin-RevId: 447992096
  • Loading branch information
nick-someone authored and Error Prone Team committed May 11, 2022
1 parent 9ca78df commit cc06a4a
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 103 deletions.
Expand Up @@ -27,7 +27,6 @@

import com.google.auto.value.AutoValue;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.errorprone.annotations.CompileTimeConstant;
Expand Down Expand Up @@ -86,8 +85,6 @@ private Matcher<ExpressionTree> matcher() {
.withParameters(parameterTypes());
}

private static final Splitter PARAM_SPLITTER = Splitter.on(',');

/**
* Parses an API string into an {@link Api}. Example API strings are:
*
Expand All @@ -98,100 +95,161 @@ private Matcher<ExpressionTree> matcher() {
* <li>an instance method with types erased (e.g., {@code java.util.List#add(java.lang.Object)})
* </ul>
*/
static Api parse(String apiWithWhitespace) {
// TODO(kak): consider removing whitespace from the String as we step through the String
String api = whitespace().removeFrom(apiWithWhitespace);

boolean isConstructor = false;
int hashIndex = -1;
int openParenIndex = -1;
int closeParenIndex = -1;
int lessThanIndex = -1;
int greaterThanIndex = -1;
for (int i = 0; i < api.length(); i++) {
char ch = api.charAt(i);
switch (ch) {
case '#':
check(hashIndex == -1, api, "it contains more than one '#'");
hashIndex = i;
break;
case '(':
check(openParenIndex == -1, api, "it contains more than one '('");
openParenIndex = i;
break;
case ')':
check(closeParenIndex == -1, api, "it contains more than one ')'");
closeParenIndex = i;
break;
case '<':
check(lessThanIndex == -1, api, "it contains more than one '<'");
lessThanIndex = i;
isConstructor = true;
break;
case '>':
check(greaterThanIndex == -1, api, "it contains more than one '>'");
greaterThanIndex = i;
isConstructor = true;
break;
case ',': // for separating parameters
case '.': // for package names and fully qualified parameter names
case '[': // for array signature types
case ']': // for array signature types
break;
default:
checkArgument(
isJavaIdentifierPart(ch),
"Unable to parse '%s' because '%s' is not a valid identifier",
api,
ch);
}
}
static Api parse(String api) {
Parser p = new Parser(api);

// make sure we've seen a hash, open paren, and close paren
check(hashIndex != -1, api, "it must contain a '#'");
check(openParenIndex != -1, api, "it must contain a '('");
check(closeParenIndex == api.length() - 1, api, "it must end with ')'");
// Let's parse this in 3 parts:
// * Fully-qualified owning name, followed by #
// * method name, or "<init>", followed by (
// * Any number of parameter types, all but the last followed by a ',', Finishing with )
// * and nothing at the end.

// make sure they came in the correct order: <sometext>#<sometext>(<sometext>)
check(hashIndex < openParenIndex, api, "'#' must come before '('");
check(openParenIndex < closeParenIndex, api, "'(' must come before ')'");
String className = p.owningType();
String methodName = p.methodName();
ImmutableList<String> paramList = p.parameters();
p.ensureNoMoreCharacters();

if (isConstructor) {
// make sure that if we've seen a < or >, we also have seen the matching one
check(lessThanIndex != -1, api, "must contain both '<' and '>'");
check(greaterThanIndex != -1, api, "must contain both '<' and '>'");
return new AutoValue_Api(className, methodName, paramList);
}

// make sure the < comes directly after the #
check(lessThanIndex == hashIndex + 1, api, "'<' must come directly after '#'");
private static final class Parser {
private final String api;
private int position = -1;

// make sure that the < comes before the >
check(lessThanIndex < greaterThanIndex, api, "'<' must come before '>'");
Parser(String api) {
this.api = api;
}

// make sure that the > comes directly before the (
check(greaterThanIndex == openParenIndex - 1, api, "'>' must come directly before '('");
String owningType() {
StringBuilder buffer = new StringBuilder(api.length());
token:
do {
char next = nextLookingFor('#');
switch (next) {
case '#':
// We've hit the end of the leading type, break out.
break token;
case '.':
// OK, separator
break;
default:
checkArgument(
isJavaIdentifierPart(next),
"Unable to parse '%s' because '%s' is not a valid identifier",
api,
next);
}
buffer.append(next);
} while (true);
String type = buffer.toString();

// make sure the only thing between the < and > is exactly "init"
String constructorName = api.substring(lessThanIndex + 1, greaterThanIndex);
checkArgument(
constructorName.equals("init"),
"Unable to parse '%s' because %s is an invalid method name",
check(!type.isEmpty(), api, "class name cannot be empty");
check(
isJavaIdentifierStart(type.charAt(0)),
api,
constructorName);
"the class name must start with a valid character");
return type;
}

String className = api.substring(0, hashIndex);
String methodName = api.substring(hashIndex + 1, openParenIndex);
String parameters = api.substring(openParenIndex + 1, closeParenIndex);
String methodName() {
StringBuilder buffer = new StringBuilder(api.length() - position);
boolean isConstructor = false;
boolean finishedConstructor = false;
// match "<init>", or otherwise a normal method name
token:
do {
char next = nextLookingFor('(');
switch (next) {
case '(':
// We've hit the end of the method name, break out.
break token;
case '<':
// Starting a constructor
check(!isConstructor, api, "Only one '<' is allowed");
check(buffer.length() == 0, api, "'<' must come directly after '#'");
isConstructor = true;
break;
case '>':
check(isConstructor, api, "'<' must come before '>'");
check(!finishedConstructor, api, "Only one '>' is allowed");
finishedConstructor = true;
break;
default:
checkArgument(
isJavaIdentifierPart(next),
"Unable to parse '%s' because '%s' is not a valid identifier",
api,
next);
}
buffer.append(next);
} while (true);

String methodName = buffer.toString();
if (isConstructor) {
check(finishedConstructor, api, "found '<' without closing '>");

ImmutableList<String> paramList =
parameters.isEmpty()
? ImmutableList.of()
: PARAM_SPLITTER.splitToStream(parameters).collect(toImmutableList());
// Must be "<init>" exactly
checkArgument(
methodName.equals("<init>"),
"Unable to parse '%s' because %s is an invalid method name",
api,
methodName);
} else {
check(!methodName.isEmpty(), api, "method name cannot be empty");
check(
isJavaIdentifierStart(methodName.charAt(0)),
api,
"the method name must start with a valid character");
}

// make sure the class name, method name, and parameter names are not empty
check(!className.isEmpty(), api, "the class name cannot be empty");
check(!methodName.isEmpty(), api, "the method name cannot be empty");
for (String parameter : paramList) {
return methodName;
}

ImmutableList<String> parameters() {
// Text until the next ',' or ')' represents the parameter type.
// If the first token is ')', then we have an empty parameter list.
StringBuilder buffer = new StringBuilder(api.length() - position);
ImmutableList.Builder<String> paramBuilder = ImmutableList.builder();
boolean emptyList = true;
paramList:
do {
char next = nextLookingFor(')');
switch (next) {
case ')':
if (emptyList) {
return ImmutableList.of();
}
// We've hit the end of the whole list, bail out.
paramBuilder.add(consumeParam(buffer));
break paramList;
case ',':
// We've hit the middle of a parameter, consume it
paramBuilder.add(consumeParam(buffer));
break;

case '[':
case ']':
case '.':
// . characters are separators, [ and ] are array characters, they're checked @ the end
buffer.append(next);
break;

default:
checkArgument(
isJavaIdentifierPart(next),
"Unable to parse '%s' because '%s' is not a valid identifier",
api,
next);
emptyList = false;
buffer.append(next);
}
} while (true);
return paramBuilder.build();
}

private String consumeParam(StringBuilder buffer) {
String parameter = buffer.toString();
buffer.setLength(0); // reset the buffer
check(!parameter.isEmpty(), api, "parameters cannot be empty");

check(
Expand All @@ -202,8 +260,8 @@ static Api parse(String apiWithWhitespace) {
// Array specs must be in balanced pairs at the *end* of the parameter.
boolean parsingArrayStart = false;
boolean hasArraySpecifiers = false;
for (int i = 1; i < parameter.length(); i++) {
char c = parameter.charAt(i);
for (int k = 1; k < parameter.length(); k++) {
char c = parameter.charAt(k);
switch (c) {
case '[':
check(!parsingArrayStart, api, "multiple consecutive [");
Expand All @@ -222,22 +280,27 @@ static Api parse(String apiWithWhitespace) {
}
}
check(!parsingArrayStart, api, "[ without closing ] at the end of a parameter type");
return parameter;
}
// make sure the class name starts with a valid Java identifier character
check(
isJavaIdentifierStart(className.charAt(0)),
api,
"the class name must start with a valid character");

if (!isConstructor) {
// make sure the method name starts with a valid Java identifier character
check(
isJavaIdentifierStart(methodName.charAt(0)),
api,
"the method name must start with a valid character");

// skip whitespace characters and give the next non-whitespace character. If we hit the end
// without a non-whitespace character, throw expecting the delimiter.
private char nextLookingFor(char delimiter) {
char next;
do {
position++;
checkArgument(
position < api.length(), "Could not parse '%s' as it must contain an '%s'", delimiter);
next = api.charAt(position);
} while (whitespace().matches(next));
return next;
}

return new AutoValue_Api(className, methodName, paramList);
void ensureNoMoreCharacters() {
while (++position < api.length()) {
check(whitespace().matches(api.charAt(position)), api, "it should end in ')'");
}
}
}

// The @CompileTimeConstant is for performance - reason should be constant and not eagerly
Expand Down
Expand Up @@ -16,6 +16,7 @@

package com.google.errorprone.bugpatterns.checkreturnvalue;

import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;

Expand Down Expand Up @@ -106,15 +107,15 @@ public void parseApi_methodWithoutParams() {
@Test
public void parseApi_methodWithParamsAndSpaces() {
String string =
"com.google.android.libraries.stitch.binder.Binder"
+ "#get(android.content.Context,java.lang.Class)";
" com.google.android.libraries.stitch.binder.Binder"
+ "#get(android.content.Context , java.lang.Class) ";
Api api = Api.parse(string);
assertThat(api.methodName()).isEqualTo("get");
assertThat(api.parameterTypes())
.containsExactly("android.content.Context", "java.lang.Class")
.inOrder();
assertThat(api.isConstructor()).isFalse();
assertThat(api.toString()).isEqualTo(string);
assertThat(api.toString()).isEqualTo(whitespace().removeFrom(string));
}

@Test
Expand Down

0 comments on commit cc06a4a

Please sign in to comment.