Skip to content

Commit

Permalink
READY - Stop DOS attacks by making the lexer stop early on evil input. (
Browse files Browse the repository at this point in the history
#2892)

Port to 18.x
  • Loading branch information
bbakerman committed Jul 26, 2022
1 parent a8c2b7e commit 1511839
Show file tree
Hide file tree
Showing 11 changed files with 586 additions and 25 deletions.
4 changes: 4 additions & 0 deletions src/main/java/graphql/ParseAndValidate.java
Expand Up @@ -11,6 +11,8 @@
import java.util.List;
import java.util.function.Predicate;

import static java.util.Optional.ofNullable;

/**
* This class allows you to parse and validate a graphql query without executing it. It will tell you
* if its syntactically valid and also semantically valid according to the graphql specification
Expand Down Expand Up @@ -58,6 +60,8 @@ public static ParseAndValidateResult parse(ExecutionInput executionInput) {
//
// we allow the caller to specify new parser options by context
ParserOptions parserOptions = executionInput.getGraphQLContext().get(ParserOptions.class);
// we use the query parser options by default if they are not specified
parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultOperationParserOptions());
Parser parser = new Parser();
Document document = parser.parseDocument(executionInput.getQuery(), parserOptions);
return ParseAndValidateResult.newResult().document(document).variables(executionInput.getVariables()).build();
Expand Down
11 changes: 6 additions & 5 deletions src/main/java/graphql/parser/GraphqlAntlrToLanguage.java
Expand Up @@ -76,15 +76,16 @@
import static graphql.Assert.assertShouldNeverHappen;
import static graphql.collect.ImmutableKit.emptyList;
import static graphql.collect.ImmutableKit.map;
import static graphql.parser.Parser.CHANNEL_COMMENTS;
import static graphql.parser.Parser.CHANNEL_WHITESPACE;
import static graphql.parser.StringValueParsing.parseSingleQuotedString;
import static graphql.parser.StringValueParsing.parseTripleQuotedString;
import static java.util.Optional.ofNullable;

@Internal
public class GraphqlAntlrToLanguage {

private static final List<Comment> NO_COMMENTS = ImmutableKit.emptyList();
private static final int CHANNEL_COMMENTS = 2;
private static final int CHANNEL_IGNORED_CHARS = 3;
private final CommonTokenStream tokens;
private final MultiSourceReader multiSourceReader;
private final ParserOptions parserOptions;
Expand All @@ -97,7 +98,7 @@ public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiS
public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiSourceReader, ParserOptions parserOptions) {
this.tokens = tokens;
this.multiSourceReader = multiSourceReader;
this.parserOptions = parserOptions == null ? ParserOptions.getDefaultParserOptions() : parserOptions;
this.parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
}

public ParserOptions getParserOptions() {
Expand Down Expand Up @@ -791,12 +792,12 @@ private void addIgnoredChars(ParserRuleContext ctx, NodeBuilder nodeBuilder) {
}
Token start = ctx.getStart();
int tokenStartIndex = start.getTokenIndex();
List<Token> leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_IGNORED_CHARS);
List<Token> leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_WHITESPACE);
List<IgnoredChar> ignoredCharsLeft = mapTokenToIgnoredChar(leftChannel);

Token stop = ctx.getStop();
int tokenStopIndex = stop.getTokenIndex();
List<Token> rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_IGNORED_CHARS);
List<Token> rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_WHITESPACE);
List<IgnoredChar> ignoredCharsRight = mapTokenToIgnoredChar(rightChannel);

nodeBuilder.ignoredChars(new IgnoredChars(ignoredCharsLeft, ignoredCharsRight));
Expand Down
44 changes: 34 additions & 10 deletions src/main/java/graphql/parser/Parser.java
@@ -1,5 +1,6 @@
package graphql.parser;

import graphql.Internal;
import graphql.PublicApi;
import graphql.language.Document;
import graphql.language.Node;
Expand All @@ -24,6 +25,8 @@
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.List;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;

/**
Expand All @@ -45,6 +48,11 @@
@PublicApi
public class Parser {

@Internal
public static final int CHANNEL_COMMENTS = 2;
@Internal
public static final int CHANNEL_WHITESPACE = 3;

/**
* Parses a string input into a graphql AST {@link Document}
*
Expand Down Expand Up @@ -195,7 +203,16 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
}
});

CommonTokenStream tokens = new CommonTokenStream(lexer);
// default in the parser options if they are not set
parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());

// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
int maxTokens = parserOptions.getMaxTokens();
int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwCancelParseIfTooManyTokens(token, maxTokenCount, multiSourceReader);
SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);

CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);

GraphqlParser parser = new GraphqlParser(tokens);
parser.removeErrorListeners();
Expand Down Expand Up @@ -268,21 +285,28 @@ public int getCharPositionInLine() {

count++;
if (count > maxTokens) {
String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens);
SourceLocation sourceLocation = null;
String offendingToken = null;
if (token != null) {
offendingToken = node.getText();
sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
}

throw new ParseCancelledException(msg, sourceLocation, offendingToken);
throwCancelParseIfTooManyTokens(token, maxTokens, multiSourceReader);
}
}
};
parser.addParseListener(listener);
}

private void throwCancelParseIfTooManyTokens(Token token, int maxTokens, MultiSourceReader multiSourceReader) throws ParseCancelledException {
String tokenType = "grammar";
SourceLocation sourceLocation = null;
String offendingToken = null;
if (token != null) {
int channel = token.getChannel();
tokenType = channel == CHANNEL_WHITESPACE ? "whitespace" : (channel == CHANNEL_COMMENTS ? "comments" : "grammar");

offendingToken = token.getText();
sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
}
String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens, tokenType);
throw new ParseCancelledException(msg, sourceLocation, offendingToken);
}

/**
* Allows you to override the ANTLR to AST code.
*
Expand Down
120 changes: 113 additions & 7 deletions src/main/java/graphql/parser/ParserOptions.java
Expand Up @@ -13,32 +13,57 @@
public class ParserOptions {

/**
* An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* memory representing a document that wont ever execute. To prevent this for most users, graphql-java
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
* set this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you
* allow the longer it takes.
*
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
* JVM wide.
*/
public static final int MAX_QUERY_TOKENS = 15000;
public static final int MAX_QUERY_TOKENS = 15_000;
/**
* Another graphql hacking vector is to send large amounts of whitespace in operations that burn lots of parsing CPU time and burn
* memory representing a document. Whitespace token processing in ANTLR is 2 orders of magnitude faster than grammar token processing
* however it still takes some time to happen.
*
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
* JVM wide.
*/
public static final int MAX_WHITESPACE_TOKENS = 200_000;

private static ParserOptions defaultJvmParserOptions = newParserOptions()
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.build();

private static ParserOptions defaultJvmOperationParserOptions = newParserOptions()
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(false) // #comments are not useful in query parsing
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.build();

private static ParserOptions defaultJvmSdlParserOptions = newParserOptions()
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true) // #comments are useful in SDL parsing
.maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
.maxWhitespaceTokens(Integer.MAX_VALUE)
.build();

/**
* By default the Parser will not capture ignored characters. A static holds this default
* By default, the Parser will not capture ignored characters. A static holds this default
* value in a JVM wide basis options object.
*
* Significant memory savings can be made if we do NOT capture ignored characters,
* especially in SDL parsing.
*
* @return the static default value on whether to capture ignored chars
* @return the static default JVM value
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
Expand All @@ -48,7 +73,7 @@ public static ParserOptions getDefaultParserOptions() {
}

/**
* By default the Parser will not capture ignored characters. A static holds this default
* By default, the Parser will not capture ignored characters. A static holds this default
* value in a JVM wide basis options object.
*
* Significant memory savings can be made if we do NOT capture ignored characters,
Expand All @@ -65,17 +90,80 @@ public static void setDefaultParserOptions(ParserOptions options) {
defaultJvmParserOptions = assertNotNull(options);
}


/**
* By default, for operation parsing, the Parser will not capture ignored characters, and it will not capture line comments into AST
* elements . A static holds this default value for operation parsing in a JVM wide basis options object.
*
* @return the static default JVM value for operation parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
*/
public static ParserOptions getDefaultOperationParserOptions() {
return defaultJvmOperationParserOptions;
}

/**
* By default, the Parser will not capture ignored characters or line comments. A static holds this default
* value in a JVM wide basis options object for operation parsing.
*
* This static can be set to true to allow the behavior of version 16.x or before.
*
* @param options - the new default JVM parser options for operation parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
*/
public static void setDefaultOperationParserOptions(ParserOptions options) {
defaultJvmOperationParserOptions = assertNotNull(options);
}

/**
* By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST
* elements. The SDL default options allow unlimited tokens and whitespace, since a DOS attack vector is
* not commonly available via schema SDL parsing.
*
* A static holds this default value for SDL parsing in a JVM wide basis options object.
*
* @return the static default JVM value for SDL parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
* @see graphql.schema.idl.SchemaParser
*/
public static ParserOptions getDefaultSdlParserOptions() {
return defaultJvmSdlParserOptions;
}

/**
* By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST
* elements . A static holds this default value for operation parsing in a JVM wide basis options object.
*
* This static can be set to true to allow the behavior of version 16.x or before.
*
* @param options - the new default JVM parser options for operation parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
*/
public static void setDefaultSdlParserOptions(ParserOptions options) {
defaultJvmSdlParserOptions = assertNotNull(options);
}

private final boolean captureIgnoredChars;
private final boolean captureSourceLocation;
private final boolean captureLineComments;
private final int maxTokens;
private final int maxWhitespaceTokens;
private final ParsingListener parsingListener;

private ParserOptions(Builder builder) {
this.captureIgnoredChars = builder.captureIgnoredChars;
this.captureSourceLocation = builder.captureSourceLocation;
this.captureLineComments = builder.captureLineComments;
this.maxTokens = builder.maxTokens;
this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
this.parsingListener = builder.parsingListener;
}

Expand Down Expand Up @@ -117,7 +205,7 @@ public boolean isCaptureLineComments() {
}

/**
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
* memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse
* tokens that will be accepted before an exception is thrown and the parsing is stopped.
*
Expand All @@ -127,6 +215,17 @@ public int getMaxTokens() {
return maxTokens;
}

/**
* A graphql hacking vector is to send larges amounts of whitespace that burn lots of parsing CPU time and burn
* memory representing a document. To prevent this you can set a maximum number of whitespace parse
* tokens that will be accepted before an exception is thrown and the parsing is stopped.
*
* @return the maximum number of raw whitespace tokens the parser will accept, after which an exception will be thrown.
*/
public int getMaxWhitespaceTokens() {
return maxWhitespaceTokens;
}

public ParsingListener getParsingListener() {
return parsingListener;
}
Expand All @@ -148,6 +247,7 @@ public static class Builder {
private boolean captureLineComments = true;
private int maxTokens = MAX_QUERY_TOKENS;
private ParsingListener parsingListener = ParsingListener.NOOP;
private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;

Builder() {
}
Expand All @@ -157,6 +257,7 @@ public static class Builder {
this.captureSourceLocation = parserOptions.captureSourceLocation;
this.captureLineComments = parserOptions.captureLineComments;
this.maxTokens = parserOptions.maxTokens;
this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
this.parsingListener = parserOptions.parsingListener;
}

Expand All @@ -180,6 +281,11 @@ public Builder maxTokens(int maxTokens) {
return this;
}

public Builder maxWhitespaceTokens(int maxWhitespaceTokens) {
this.maxWhitespaceTokens = maxWhitespaceTokens;
return this;
}

public Builder parsingListener(ParsingListener parsingListener) {
this.parsingListener = assertNotNull(parsingListener);
return this;
Expand Down

0 comments on commit 1511839

Please sign in to comment.