READY - Stop DOS attacks by making the lexer stop early on evil input. (

#2892) Port to 18.x
graphql-java · Jul 26, 2022 · 1511839 · 1511839
1 parent a8c2b7e
commit 1511839
Show file tree

Hide file tree

Showing 11 changed files with 586 additions and 25 deletions.
diff --git a/src/main/java/graphql/ParseAndValidate.java b/src/main/java/graphql/ParseAndValidate.java
@@ -11,6 +11,8 @@
 import java.util.List;
 import java.util.function.Predicate;
 
+import static java.util.Optional.ofNullable;
+
 /**
  * This class allows you to parse and validate a graphql query without executing it.  It will tell you
  * if its syntactically valid and also semantically valid according to the graphql specification
@@ -58,6 +60,8 @@ public static ParseAndValidateResult parse(ExecutionInput executionInput) {
             //
             // we allow the caller to specify new parser options by context
             ParserOptions parserOptions = executionInput.getGraphQLContext().get(ParserOptions.class);
+            // we use the query parser options by default if they are not specified
+            parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultOperationParserOptions());
             Parser parser = new Parser();
             Document document = parser.parseDocument(executionInput.getQuery(), parserOptions);
             return ParseAndValidateResult.newResult().document(document).variables(executionInput.getVariables()).build();

diff --git a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java
@@ -76,15 +76,16 @@
 import static graphql.Assert.assertShouldNeverHappen;
 import static graphql.collect.ImmutableKit.emptyList;
 import static graphql.collect.ImmutableKit.map;
+import static graphql.parser.Parser.CHANNEL_COMMENTS;
+import static graphql.parser.Parser.CHANNEL_WHITESPACE;
 import static graphql.parser.StringValueParsing.parseSingleQuotedString;
 import static graphql.parser.StringValueParsing.parseTripleQuotedString;
+import static java.util.Optional.ofNullable;
 
 @Internal
 public class GraphqlAntlrToLanguage {
 
     private static final List<Comment> NO_COMMENTS = ImmutableKit.emptyList();
-    private static final int CHANNEL_COMMENTS = 2;
-    private static final int CHANNEL_IGNORED_CHARS = 3;
     private final CommonTokenStream tokens;
     private final MultiSourceReader multiSourceReader;
     private final ParserOptions parserOptions;
@@ -97,7 +98,7 @@ public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiS
     public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiSourceReader, ParserOptions parserOptions) {
         this.tokens = tokens;
         this.multiSourceReader = multiSourceReader;
-        this.parserOptions = parserOptions == null ? ParserOptions.getDefaultParserOptions() : parserOptions;
+        this.parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
     }
 
     public ParserOptions getParserOptions() {
@@ -791,12 +792,12 @@ private void addIgnoredChars(ParserRuleContext ctx, NodeBuilder nodeBuilder) {
         }
         Token start = ctx.getStart();
         int tokenStartIndex = start.getTokenIndex();
-        List<Token> leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_IGNORED_CHARS);
+        List<Token> leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_WHITESPACE);
         List<IgnoredChar> ignoredCharsLeft = mapTokenToIgnoredChar(leftChannel);
 
         Token stop = ctx.getStop();
         int tokenStopIndex = stop.getTokenIndex();
-        List<Token> rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_IGNORED_CHARS);
+        List<Token> rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_WHITESPACE);
         List<IgnoredChar> ignoredCharsRight = mapTokenToIgnoredChar(rightChannel);
 
         nodeBuilder.ignoredChars(new IgnoredChars(ignoredCharsLeft, ignoredCharsRight));

diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java
@@ -1,5 +1,6 @@
 package graphql.parser;
 
+import graphql.Internal;
 import graphql.PublicApi;
 import graphql.language.Document;
 import graphql.language.Node;
@@ -24,6 +25,8 @@
 import java.io.Reader;
 import java.io.UncheckedIOException;
 import java.util.List;
+import java.util.Optional;
+import java.util.function.BiConsumer;
 import java.util.function.BiFunction;
 
 /**
@@ -45,6 +48,11 @@
 @PublicApi
 public class Parser {
 
+    @Internal
+    public static final int CHANNEL_COMMENTS = 2;
+    @Internal
+    public static final int CHANNEL_WHITESPACE = 3;
+
     /**
      * Parses a string input into a graphql AST {@link Document}
      *
@@ -195,7 +203,16 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
             }
         });
 
-        CommonTokenStream tokens = new CommonTokenStream(lexer);
+        // default in the parser options if they are not set
+        parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
+
+        // this lexer wrapper allows us to stop lexing when too many tokens are in place.  This prevents DOS attacks.
+        int maxTokens = parserOptions.getMaxTokens();
+        int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
+        BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwCancelParseIfTooManyTokens(token, maxTokenCount, multiSourceReader);
+        SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
+
+        CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
 
         GraphqlParser parser = new GraphqlParser(tokens);
         parser.removeErrorListeners();
@@ -268,21 +285,28 @@ public int getCharPositionInLine() {
 
                 count++;
                 if (count > maxTokens) {
-                    String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens);
-                    SourceLocation sourceLocation = null;
-                    String offendingToken = null;
-                    if (token != null) {
-                        offendingToken = node.getText();
-                        sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
-                    }
-
-                    throw new ParseCancelledException(msg, sourceLocation, offendingToken);
+                    throwCancelParseIfTooManyTokens(token, maxTokens, multiSourceReader);
                 }
             }
         };
         parser.addParseListener(listener);
     }
 
+    private void throwCancelParseIfTooManyTokens(Token token, int maxTokens, MultiSourceReader multiSourceReader) throws ParseCancelledException {
+        String tokenType  = "grammar";
+        SourceLocation sourceLocation = null;
+        String offendingToken = null;
+        if (token != null) {
+            int channel = token.getChannel();
+            tokenType = channel == CHANNEL_WHITESPACE ? "whitespace" : (channel == CHANNEL_COMMENTS ? "comments" : "grammar");
+
+            offendingToken = token.getText();
+            sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
+        }
+        String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens, tokenType);
+        throw new ParseCancelledException(msg, sourceLocation, offendingToken);
+    }
+
     /**
      * Allows you to override the ANTLR to AST code.
      *

diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java
@@ -13,32 +13,57 @@
 public class ParserOptions {
 
     /**
-     * An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
-     * memory representing a document that wont ever execute.  To prevent this for most users, graphql-java
+     * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
+     * memory representing a document that won't ever execute.  To prevent this for most users, graphql-java
      * set this value to 15000.  ANTLR parsing time is linear to the number of tokens presented.  The more you
      * allow the longer it takes.
      *
      * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
      * JVM wide.
      */
-    public static final int MAX_QUERY_TOKENS = 15000;
+    public static final int MAX_QUERY_TOKENS = 15_000;
+    /**
+     * Another graphql hacking vector is to send large amounts of whitespace in operations that burn lots of parsing CPU time and burn
+     * memory representing a document.  Whitespace token processing in ANTLR is 2 orders of magnitude faster than grammar token processing
+     * however it still takes some time to happen.
+     *
+     * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
+     * JVM wide.
+     */
+    public static final int MAX_WHITESPACE_TOKENS = 200_000;
 
     private static ParserOptions defaultJvmParserOptions = newParserOptions()
             .captureIgnoredChars(false)
             .captureSourceLocation(true)
             .captureLineComments(true)
             .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
+            .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
+            .build();
+
+    private static ParserOptions defaultJvmOperationParserOptions = newParserOptions()
+            .captureIgnoredChars(false)
+            .captureSourceLocation(true)
+            .captureLineComments(false) // #comments are not useful in query parsing
+            .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
+            .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
+            .build();
 
+    private static ParserOptions defaultJvmSdlParserOptions = newParserOptions()
+            .captureIgnoredChars(false)
+            .captureSourceLocation(true)
+            .captureLineComments(true) // #comments are useful in SDL parsing
+            .maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
+            .maxWhitespaceTokens(Integer.MAX_VALUE)
             .build();
 
     /**
-     * By default the Parser will not capture ignored characters.  A static holds this default
+     * By default, the Parser will not capture ignored characters.  A static holds this default
      * value in a JVM wide basis options object.
      *
      * Significant memory savings can be made if we do NOT capture ignored characters,
      * especially in SDL parsing.
      *
-     * @return the static default value on whether to capture ignored chars
+     * @return the static default JVM value
      *
      * @see graphql.language.IgnoredChar
      * @see graphql.language.SourceLocation
@@ -48,7 +73,7 @@ public static ParserOptions getDefaultParserOptions() {
     }
 
     /**
-     * By default the Parser will not capture ignored characters.  A static holds this default
+     * By default, the Parser will not capture ignored characters.  A static holds this default
      * value in a JVM wide basis options object.
      *
      * Significant memory savings can be made if we do NOT capture ignored characters,
@@ -65,17 +90,80 @@ public static void setDefaultParserOptions(ParserOptions options) {
         defaultJvmParserOptions = assertNotNull(options);
     }
 
+
+    /**
+     * By default, for operation parsing, the Parser will not capture ignored characters, and it will not capture line comments into AST
+     * elements .  A static holds this default value for operation parsing in a JVM wide basis options object.
+     *
+     * @return the static default JVM value for operation parsing
+     *
+     * @see graphql.language.IgnoredChar
+     * @see graphql.language.SourceLocation
+     */
+    public static ParserOptions getDefaultOperationParserOptions() {
+        return defaultJvmOperationParserOptions;
+    }
+
+    /**
+     * By default, the Parser will not capture ignored characters or line comments.  A static holds this default
+     * value in a JVM wide basis options object for operation parsing.
+     *
+     * This static can be set to true to allow the behavior of version 16.x or before.
+     *
+     * @param options - the new default JVM parser options for operation parsing
+     *
+     * @see graphql.language.IgnoredChar
+     * @see graphql.language.SourceLocation
+     */
+    public static void setDefaultOperationParserOptions(ParserOptions options) {
+        defaultJvmOperationParserOptions = assertNotNull(options);
+    }
+
+    /**
+     * By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST
+     * elements.  The SDL default options allow unlimited tokens and whitespace, since a DOS attack vector is
+     * not commonly available via schema SDL parsing.
+     *
+     * A static holds this default value for SDL parsing in a JVM wide basis options object.
+     *
+     * @return the static default JVM value for SDL parsing
+     *
+     * @see graphql.language.IgnoredChar
+     * @see graphql.language.SourceLocation
+     * @see graphql.schema.idl.SchemaParser
+     */
+    public static ParserOptions getDefaultSdlParserOptions() {
+        return defaultJvmSdlParserOptions;
+    }
+
+    /**
+     * By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST
+     * elements .  A static holds this default value for operation parsing in a JVM wide basis options object.
+     *
+     * This static can be set to true to allow the behavior of version 16.x or before.
+     *
+     * @param options - the new default JVM parser options for operation parsing
+     *
+     * @see graphql.language.IgnoredChar
+     * @see graphql.language.SourceLocation
+     */
+    public static void setDefaultSdlParserOptions(ParserOptions options) {
+        defaultJvmSdlParserOptions = assertNotNull(options);
+    }
+
     private final boolean captureIgnoredChars;
     private final boolean captureSourceLocation;
     private final boolean captureLineComments;
     private final int maxTokens;
+    private final int maxWhitespaceTokens;
     private final ParsingListener parsingListener;
 
     private ParserOptions(Builder builder) {
         this.captureIgnoredChars = builder.captureIgnoredChars;
         this.captureSourceLocation = builder.captureSourceLocation;
         this.captureLineComments = builder.captureLineComments;
         this.maxTokens = builder.maxTokens;
+        this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
         this.parsingListener = builder.parsingListener;
     }
 
@@ -117,7 +205,7 @@ public boolean isCaptureLineComments() {
     }
 
     /**
-     * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
+     * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
      * memory representing a document that won't ever execute.  To prevent this you can set a maximum number of parse
      * tokens that will be accepted before an exception is thrown and the parsing is stopped.
      *
@@ -127,6 +215,17 @@ public int getMaxTokens() {
         return maxTokens;
     }
 
+    /**
+     * A graphql hacking vector is to send larges amounts of whitespace that burn lots of parsing CPU time and burn
+     * memory representing a document.  To prevent this you can set a maximum number of whitespace parse
+     * tokens that will be accepted before an exception is thrown and the parsing is stopped.
+     *
+     * @return the maximum number of raw whitespace tokens the parser will accept, after which an exception will be thrown.
+     */
+    public int getMaxWhitespaceTokens() {
+        return maxWhitespaceTokens;
+    }
+
     public ParsingListener getParsingListener() {
         return parsingListener;
     }
@@ -148,6 +247,7 @@ public static class Builder {
         private boolean captureLineComments = true;
         private int maxTokens = MAX_QUERY_TOKENS;
         private ParsingListener parsingListener = ParsingListener.NOOP;
+        private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
 
         Builder() {
         }
@@ -157,6 +257,7 @@ public static class Builder {
             this.captureSourceLocation = parserOptions.captureSourceLocation;
             this.captureLineComments = parserOptions.captureLineComments;
             this.maxTokens = parserOptions.maxTokens;
+            this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
             this.parsingListener = parserOptions.parsingListener;
         }
 
@@ -180,6 +281,11 @@ public Builder maxTokens(int maxTokens) {
             return this;
         }
 
+        public Builder maxWhitespaceTokens(int maxWhitespaceTokens) {
+            this.maxWhitespaceTokens = maxWhitespaceTokens;
+            return this;
+        }
+
         public Builder parsingListener(ParsingListener parsingListener) {
             this.parsingListener = assertNotNull(parsingListener);
             return this;