From cc06a4abd6fa834e874f9d9447a8d4863279d56d Mon Sep 17 00:00:00 2001 From: Nick Glorioso Date: Wed, 11 May 2022 07:48:09 -0700 Subject: [PATCH] Further optimize Api.parse() by removing whitespace in the parsing pass, and avoiding multiple string copies. PiperOrigin-RevId: 447992096 --- .../bugpatterns/checkreturnvalue/Api.java | 263 +++++++++++------- .../bugpatterns/checkreturnvalue/ApiTest.java | 7 +- 2 files changed, 167 insertions(+), 103 deletions(-) diff --git a/core/src/main/java/com/google/errorprone/bugpatterns/checkreturnvalue/Api.java b/core/src/main/java/com/google/errorprone/bugpatterns/checkreturnvalue/Api.java index 0b02c09907d..c9c4678f9cd 100644 --- a/core/src/main/java/com/google/errorprone/bugpatterns/checkreturnvalue/Api.java +++ b/core/src/main/java/com/google/errorprone/bugpatterns/checkreturnvalue/Api.java @@ -27,7 +27,6 @@ import com.google.auto.value.AutoValue; import com.google.common.base.Joiner; -import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.errorprone.annotations.CompileTimeConstant; @@ -86,8 +85,6 @@ private Matcher matcher() { .withParameters(parameterTypes()); } - private static final Splitter PARAM_SPLITTER = Splitter.on(','); - /** * Parses an API string into an {@link Api}. Example API strings are: * @@ -98,100 +95,161 @@ private Matcher matcher() { *
  • an instance method with types erased (e.g., {@code java.util.List#add(java.lang.Object)}) * */ - static Api parse(String apiWithWhitespace) { - // TODO(kak): consider removing whitespace from the String as we step through the String - String api = whitespace().removeFrom(apiWithWhitespace); - - boolean isConstructor = false; - int hashIndex = -1; - int openParenIndex = -1; - int closeParenIndex = -1; - int lessThanIndex = -1; - int greaterThanIndex = -1; - for (int i = 0; i < api.length(); i++) { - char ch = api.charAt(i); - switch (ch) { - case '#': - check(hashIndex == -1, api, "it contains more than one '#'"); - hashIndex = i; - break; - case '(': - check(openParenIndex == -1, api, "it contains more than one '('"); - openParenIndex = i; - break; - case ')': - check(closeParenIndex == -1, api, "it contains more than one ')'"); - closeParenIndex = i; - break; - case '<': - check(lessThanIndex == -1, api, "it contains more than one '<'"); - lessThanIndex = i; - isConstructor = true; - break; - case '>': - check(greaterThanIndex == -1, api, "it contains more than one '>'"); - greaterThanIndex = i; - isConstructor = true; - break; - case ',': // for separating parameters - case '.': // for package names and fully qualified parameter names - case '[': // for array signature types - case ']': // for array signature types - break; - default: - checkArgument( - isJavaIdentifierPart(ch), - "Unable to parse '%s' because '%s' is not a valid identifier", - api, - ch); - } - } + static Api parse(String api) { + Parser p = new Parser(api); - // make sure we've seen a hash, open paren, and close paren - check(hashIndex != -1, api, "it must contain a '#'"); - check(openParenIndex != -1, api, "it must contain a '('"); - check(closeParenIndex == api.length() - 1, api, "it must end with ')'"); + // Let's parse this in 3 parts: + // * Fully-qualified owning name, followed by # + // * method name, or "", followed by ( + // * Any number of parameter types, all but the last followed by a ',', Finishing with ) + // * and nothing at the end. - // make sure they came in the correct order: #() - check(hashIndex < openParenIndex, api, "'#' must come before '('"); - check(openParenIndex < closeParenIndex, api, "'(' must come before ')'"); + String className = p.owningType(); + String methodName = p.methodName(); + ImmutableList paramList = p.parameters(); + p.ensureNoMoreCharacters(); - if (isConstructor) { - // make sure that if we've seen a < or >, we also have seen the matching one - check(lessThanIndex != -1, api, "must contain both '<' and '>'"); - check(greaterThanIndex != -1, api, "must contain both '<' and '>'"); + return new AutoValue_Api(className, methodName, paramList); + } - // make sure the < comes directly after the # - check(lessThanIndex == hashIndex + 1, api, "'<' must come directly after '#'"); + private static final class Parser { + private final String api; + private int position = -1; - // make sure that the < comes before the > - check(lessThanIndex < greaterThanIndex, api, "'<' must come before '>'"); + Parser(String api) { + this.api = api; + } - // make sure that the > comes directly before the ( - check(greaterThanIndex == openParenIndex - 1, api, "'>' must come directly before '('"); + String owningType() { + StringBuilder buffer = new StringBuilder(api.length()); + token: + do { + char next = nextLookingFor('#'); + switch (next) { + case '#': + // We've hit the end of the leading type, break out. + break token; + case '.': + // OK, separator + break; + default: + checkArgument( + isJavaIdentifierPart(next), + "Unable to parse '%s' because '%s' is not a valid identifier", + api, + next); + } + buffer.append(next); + } while (true); + String type = buffer.toString(); - // make sure the only thing between the < and > is exactly "init" - String constructorName = api.substring(lessThanIndex + 1, greaterThanIndex); - checkArgument( - constructorName.equals("init"), - "Unable to parse '%s' because %s is an invalid method name", + check(!type.isEmpty(), api, "class name cannot be empty"); + check( + isJavaIdentifierStart(type.charAt(0)), api, - constructorName); + "the class name must start with a valid character"); + return type; } - String className = api.substring(0, hashIndex); - String methodName = api.substring(hashIndex + 1, openParenIndex); - String parameters = api.substring(openParenIndex + 1, closeParenIndex); + String methodName() { + StringBuilder buffer = new StringBuilder(api.length() - position); + boolean isConstructor = false; + boolean finishedConstructor = false; + // match "", or otherwise a normal method name + token: + do { + char next = nextLookingFor('('); + switch (next) { + case '(': + // We've hit the end of the method name, break out. + break token; + case '<': + // Starting a constructor + check(!isConstructor, api, "Only one '<' is allowed"); + check(buffer.length() == 0, api, "'<' must come directly after '#'"); + isConstructor = true; + break; + case '>': + check(isConstructor, api, "'<' must come before '>'"); + check(!finishedConstructor, api, "Only one '>' is allowed"); + finishedConstructor = true; + break; + default: + checkArgument( + isJavaIdentifierPart(next), + "Unable to parse '%s' because '%s' is not a valid identifier", + api, + next); + } + buffer.append(next); + } while (true); + + String methodName = buffer.toString(); + if (isConstructor) { + check(finishedConstructor, api, "found '<' without closing '>"); - ImmutableList paramList = - parameters.isEmpty() - ? ImmutableList.of() - : PARAM_SPLITTER.splitToStream(parameters).collect(toImmutableList()); + // Must be "" exactly + checkArgument( + methodName.equals(""), + "Unable to parse '%s' because %s is an invalid method name", + api, + methodName); + } else { + check(!methodName.isEmpty(), api, "method name cannot be empty"); + check( + isJavaIdentifierStart(methodName.charAt(0)), + api, + "the method name must start with a valid character"); + } - // make sure the class name, method name, and parameter names are not empty - check(!className.isEmpty(), api, "the class name cannot be empty"); - check(!methodName.isEmpty(), api, "the method name cannot be empty"); - for (String parameter : paramList) { + return methodName; + } + + ImmutableList parameters() { + // Text until the next ',' or ')' represents the parameter type. + // If the first token is ')', then we have an empty parameter list. + StringBuilder buffer = new StringBuilder(api.length() - position); + ImmutableList.Builder paramBuilder = ImmutableList.builder(); + boolean emptyList = true; + paramList: + do { + char next = nextLookingFor(')'); + switch (next) { + case ')': + if (emptyList) { + return ImmutableList.of(); + } + // We've hit the end of the whole list, bail out. + paramBuilder.add(consumeParam(buffer)); + break paramList; + case ',': + // We've hit the middle of a parameter, consume it + paramBuilder.add(consumeParam(buffer)); + break; + + case '[': + case ']': + case '.': + // . characters are separators, [ and ] are array characters, they're checked @ the end + buffer.append(next); + break; + + default: + checkArgument( + isJavaIdentifierPart(next), + "Unable to parse '%s' because '%s' is not a valid identifier", + api, + next); + emptyList = false; + buffer.append(next); + } + } while (true); + return paramBuilder.build(); + } + + private String consumeParam(StringBuilder buffer) { + String parameter = buffer.toString(); + buffer.setLength(0); // reset the buffer check(!parameter.isEmpty(), api, "parameters cannot be empty"); check( @@ -202,8 +260,8 @@ static Api parse(String apiWithWhitespace) { // Array specs must be in balanced pairs at the *end* of the parameter. boolean parsingArrayStart = false; boolean hasArraySpecifiers = false; - for (int i = 1; i < parameter.length(); i++) { - char c = parameter.charAt(i); + for (int k = 1; k < parameter.length(); k++) { + char c = parameter.charAt(k); switch (c) { case '[': check(!parsingArrayStart, api, "multiple consecutive ["); @@ -222,22 +280,27 @@ static Api parse(String apiWithWhitespace) { } } check(!parsingArrayStart, api, "[ without closing ] at the end of a parameter type"); + return parameter; } - // make sure the class name starts with a valid Java identifier character - check( - isJavaIdentifierStart(className.charAt(0)), - api, - "the class name must start with a valid character"); - - if (!isConstructor) { - // make sure the method name starts with a valid Java identifier character - check( - isJavaIdentifierStart(methodName.charAt(0)), - api, - "the method name must start with a valid character"); + + // skip whitespace characters and give the next non-whitespace character. If we hit the end + // without a non-whitespace character, throw expecting the delimiter. + private char nextLookingFor(char delimiter) { + char next; + do { + position++; + checkArgument( + position < api.length(), "Could not parse '%s' as it must contain an '%s'", delimiter); + next = api.charAt(position); + } while (whitespace().matches(next)); + return next; } - return new AutoValue_Api(className, methodName, paramList); + void ensureNoMoreCharacters() { + while (++position < api.length()) { + check(whitespace().matches(api.charAt(position)), api, "it should end in ')'"); + } + } } // The @CompileTimeConstant is for performance - reason should be constant and not eagerly diff --git a/core/src/test/java/com/google/errorprone/bugpatterns/checkreturnvalue/ApiTest.java b/core/src/test/java/com/google/errorprone/bugpatterns/checkreturnvalue/ApiTest.java index d8660ff028c..3e99528ec75 100644 --- a/core/src/test/java/com/google/errorprone/bugpatterns/checkreturnvalue/ApiTest.java +++ b/core/src/test/java/com/google/errorprone/bugpatterns/checkreturnvalue/ApiTest.java @@ -16,6 +16,7 @@ package com.google.errorprone.bugpatterns.checkreturnvalue; +import static com.google.common.base.CharMatcher.whitespace; import static com.google.common.truth.Truth.assertThat; import static org.junit.Assert.assertThrows; @@ -106,15 +107,15 @@ public void parseApi_methodWithoutParams() { @Test public void parseApi_methodWithParamsAndSpaces() { String string = - "com.google.android.libraries.stitch.binder.Binder" - + "#get(android.content.Context,java.lang.Class)"; + " com.google.android.libraries.stitch.binder.Binder" + + "#get(android.content.Context , java.lang.Class) "; Api api = Api.parse(string); assertThat(api.methodName()).isEqualTo("get"); assertThat(api.parameterTypes()) .containsExactly("android.content.Context", "java.lang.Class") .inOrder(); assertThat(api.isConstructor()).isFalse(); - assertThat(api.toString()).isEqualTo(string); + assertThat(api.toString()).isEqualTo(whitespace().removeFrom(string)); } @Test