Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

READY - Stop DOS attacks by making the lexer stop early on evil input. #2892

Merged
merged 11 commits into from Jul 26, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/main/java/graphql/ParseAndValidate.java
Expand Up @@ -11,6 +11,8 @@
import java.util.List;
import java.util.function.Predicate;

import static java.util.Optional.ofNullable;

/**
* This class allows you to parse and validate a graphql query without executing it. It will tell you
* if it's syntactically valid and also semantically valid according to the graphql specification
Expand Down Expand Up @@ -58,6 +60,8 @@ public static ParseAndValidateResult parse(ExecutionInput executionInput) {
//
// we allow the caller to specify new parser options by context
ParserOptions parserOptions = executionInput.getGraphQLContext().get(ParserOptions.class);
// we use the query parser options by default if they are not specified
parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultOperationParserOptions());
Parser parser = new Parser();
Document document = parser.parseDocument(executionInput.getQuery(), parserOptions);
return ParseAndValidateResult.newResult().document(document).variables(executionInput.getVariables()).build();
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/graphql/parser/GraphqlAntlrToLanguage.java
Expand Up @@ -75,15 +75,16 @@
import static graphql.Assert.assertShouldNeverHappen;
import static graphql.collect.ImmutableKit.emptyList;
import static graphql.collect.ImmutableKit.map;
import static graphql.parser.Parser.CHANNEL_COMMENTS;
import static graphql.parser.Parser.CHANNEL_IGNORED_CHARS;
import static graphql.parser.StringValueParsing.parseSingleQuotedString;
import static graphql.parser.StringValueParsing.parseTripleQuotedString;
import static java.util.Optional.ofNullable;

@Internal
public class GraphqlAntlrToLanguage {

private static final List<Comment> NO_COMMENTS = ImmutableKit.emptyList();
private static final int CHANNEL_COMMENTS = 2;
private static final int CHANNEL_IGNORED_CHARS = 3;
private final CommonTokenStream tokens;
private final MultiSourceReader multiSourceReader;
private final ParserOptions parserOptions;
Expand All @@ -96,7 +97,7 @@ public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiS
public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiSourceReader, ParserOptions parserOptions) {
this.tokens = tokens;
this.multiSourceReader = multiSourceReader;
this.parserOptions = parserOptions == null ? ParserOptions.getDefaultParserOptions() : parserOptions;
this.parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
}

public ParserOptions getParserOptions() {
Expand Down
44 changes: 34 additions & 10 deletions src/main/java/graphql/parser/Parser.java
@@ -1,5 +1,6 @@
package graphql.parser;

import graphql.Internal;
import graphql.PublicApi;
import graphql.language.Document;
import graphql.language.Node;
Expand All @@ -25,6 +26,8 @@
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.List;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;

/**
Expand All @@ -46,6 +49,11 @@
@PublicApi
public class Parser {

@Internal
public static final int CHANNEL_COMMENTS = 2;
@Internal
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets call this also whitespace channel and not ignored ones to make it consistent with the options.

public static final int CHANNEL_IGNORED_CHARS = 3;

/**
* Parses a string input into a graphql AST {@link Document}
*
Expand Down Expand Up @@ -222,7 +230,16 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
}
});

CommonTokenStream tokens = new CommonTokenStream(lexer);
// default in the parser options if they are not set
parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());

// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
int maxTokens = parserOptions.getMaxTokens();
int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwCancelParseIfTooManyTokens(token, maxTokenCount, multiSourceReader);
SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);

CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);

GraphqlParser parser = new GraphqlParser(tokens);
parser.removeErrorListeners();
Expand Down Expand Up @@ -295,21 +312,28 @@ public int getCharPositionInLine() {

count++;
if (count > maxTokens) {
String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens);
SourceLocation sourceLocation = null;
String offendingToken = null;
if (token != null) {
offendingToken = node.getText();
sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
}

throw new ParseCancelledException(msg, sourceLocation, offendingToken);
throwCancelParseIfTooManyTokens(token, maxTokens, multiSourceReader);
}
}
};
parser.addParseListener(listener);
}

private void throwCancelParseIfTooManyTokens(Token token, int maxTokens, MultiSourceReader multiSourceReader) throws ParseCancelledException {
String tokenType = "grammar";
SourceLocation sourceLocation = null;
String offendingToken = null;
if (token != null) {
int channel = token.getChannel();
tokenType = channel == CHANNEL_IGNORED_CHARS ? "whitespace" : (channel == CHANNEL_COMMENTS ? "comments" : "grammar");

offendingToken = token.getText();
sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
}
String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens, tokenType);
throw new ParseCancelledException(msg, sourceLocation, offendingToken);
}

/**
* Allows you to override the ANTLR to AST code.
*
Expand Down
80 changes: 73 additions & 7 deletions src/main/java/graphql/parser/ParserOptions.java
Expand Up @@ -13,32 +13,49 @@
public class ParserOptions {

/**
* An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* memory representing a document that wont ever execute. To prevent this for most users, graphql-java
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
* set this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you
* allow the longer it takes.
*
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
* JVM wide.
*/
public static final int MAX_QUERY_TOKENS = 15000;
public static final int MAX_QUERY_TOKENS = 15_000;
/**
* Another graphql hacking vector is to send large amounts of whitespace in operations that burn lots of parsing CPU time and burn
* memory representing a document. Whitespace token processing in ANTLR is 2 orders of magnitude faster than grammar token processing
* however it still takes some time to happen.
*
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
* JVM wide.
*/
public static final int MAX_WHITESPACE_TOKENS = 200_000;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should name this SDL parser options or so to make clear it will be used for Schema Parsing. See also my general review comment.

private static ParserOptions defaultJvmParserOptions = newParserOptions()
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.build();

private static ParserOptions defaultJvmOperationParserOptions = newParserOptions()
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(false) // #comments are not useful in query parsing
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.build();

/**
* By default the Parser will not capture ignored characters. A static holds this default
* By default, the Parser will not capture ignored characters. A static holds this default
* value in a JVM wide basis options object.
*
* Significant memory savings can be made if we do NOT capture ignored characters,
* especially in SDL parsing.
*
* @return the static default value on whether to capture ignored chars
* @return the static default JVM value
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
Expand All @@ -48,7 +65,20 @@ public static ParserOptions getDefaultParserOptions() {
}

/**
* By default the Parser will not capture ignored characters. A static holds this default
* By default, for operation parsing, the Parser will not capture ignored characters, and it will not capture line comments into AST
* elements . A static holds this default value for operation parsing in a JVM wide basis options object.
*
* @return the static default JVM value for query parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
*/
public static ParserOptions getDefaultOperationParserOptions() {
return defaultJvmOperationParserOptions;
}

/**
* By default, the Parser will not capture ignored characters. A static holds this default
* value in a JVM wide basis options object.
*
* Significant memory savings can be made if we do NOT capture ignored characters,
Expand All @@ -65,17 +95,35 @@ public static void setDefaultParserOptions(ParserOptions options) {
defaultJvmParserOptions = assertNotNull(options);
}

/**
* By default, the Parser will not capture ignored characters or line comments. A static holds this default
* value in a JVM wide basis options object for operation parsing.
*
* This static can be set to true to allow the behavior of version 16.x or before.
*
* @param options - the new default JVM parser options for operation parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
*/
public static void setDefaultOperationParserOptions(ParserOptions options) {
defaultJvmOperationParserOptions = assertNotNull(options);
}


private final boolean captureIgnoredChars;
private final boolean captureSourceLocation;
private final boolean captureLineComments;
private final int maxTokens;
private final int maxWhitespaceTokens;
private final ParsingListener parsingListener;

private ParserOptions(Builder builder) {
this.captureIgnoredChars = builder.captureIgnoredChars;
this.captureSourceLocation = builder.captureSourceLocation;
this.captureLineComments = builder.captureLineComments;
this.maxTokens = builder.maxTokens;
this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
this.parsingListener = builder.parsingListener;
}

Expand Down Expand Up @@ -117,7 +165,7 @@ public boolean isCaptureLineComments() {
}

/**
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
* memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse
* tokens that will be accepted before an exception is thrown and the parsing is stopped.
*
Expand All @@ -127,6 +175,17 @@ public int getMaxTokens() {
return maxTokens;
}

/**
* A graphql hacking vector is to send larges amounts of whitespace that burn lots of parsing CPU time and burn
* memory representing a document. To prevent this you can set a maximum number of whitespace parse
* tokens that will be accepted before an exception is thrown and the parsing is stopped.
*
* @return the maximum number of raw whitespace tokens the parser will accept, after which an exception will be thrown.
*/
public int getMaxWhitespaceTokens() {
return maxWhitespaceTokens;
}

public ParsingListener getParsingListener() {
return parsingListener;
}
Expand All @@ -148,6 +207,7 @@ public static class Builder {
private boolean captureLineComments = true;
private int maxTokens = MAX_QUERY_TOKENS;
private ParsingListener parsingListener = ParsingListener.NOOP;
private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;

Builder() {
}
Expand All @@ -157,6 +217,7 @@ public static class Builder {
this.captureSourceLocation = parserOptions.captureSourceLocation;
this.captureLineComments = parserOptions.captureLineComments;
this.maxTokens = parserOptions.maxTokens;
this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
this.parsingListener = parserOptions.parsingListener;
}

Expand All @@ -180,6 +241,11 @@ public Builder maxTokens(int maxTokens) {
return this;
}

public Builder maxWhitespaceTokens(int maxWhitespaceTokens) {
this.maxWhitespaceTokens = maxWhitespaceTokens;
return this;
}

public Builder parsingListener(ParsingListener parsingListener) {
this.parsingListener = assertNotNull(parsingListener);
return this;
Expand Down
94 changes: 94 additions & 0 deletions src/main/java/graphql/parser/SafeTokenSource.java
@@ -0,0 +1,94 @@
package graphql.parser;

import graphql.Internal;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenFactory;
import org.antlr.v4.runtime.TokenSource;

import java.util.function.BiConsumer;

/**
* This token source can wrap a lexer and if it asks for more than a maximum number of tokens
* the user can take some action, typically throw an exception to stop lexing.
*
* It tracks the maximum number per token channel, so we have 3 at the moment, and they will all be tracked.
*
* This is used to protect us from evil input. The lexer will eagerly try to find all tokens
* at times and certain inputs (directives butted together for example) will cause the lexer
* to keep doing work even though before the tokens are presented back to the parser
* and hence before it has a chance to stop work once too much as been done.
*/
@Internal
public class SafeTokenSource implements TokenSource {

private final TokenSource lexer;
private final int maxTokens;
private final int maxWhitespaceTokens;
private final BiConsumer<Integer, Token> whenMaxTokensExceeded;
private final int channelCounts[];

public SafeTokenSource(TokenSource lexer, int maxTokens, int maxWhitespaceTokens, BiConsumer<Integer, Token> whenMaxTokensExceeded) {
this.lexer = lexer;
this.maxTokens = maxTokens;
this.maxWhitespaceTokens = maxWhitespaceTokens;
this.whenMaxTokensExceeded = whenMaxTokensExceeded;
// this could be a Map<int,int> however we want it to be faster as possible.
// we only have 3 channels - but they are 0,2 and 3 so use 5 for safety - still faster than a map get/put
// if we ever add another channel beyond 5 it will IOBEx during tests so future changes will be handled before release!
this.channelCounts = new int[]{0, 0, 0, 0, 0};
}


@Override
public Token nextToken() {
Token token = lexer.nextToken();
if (token != null) {
int channel = token.getChannel();
int currentCount = ++channelCounts[channel];
if (channel == Parser.CHANNEL_IGNORED_CHARS) {
// whitespace gets its own max count
callbackIfMaxExceeded(maxWhitespaceTokens, currentCount, token);
} else {
callbackIfMaxExceeded(maxTokens, currentCount, token);
}
}
return token;
}

private void callbackIfMaxExceeded(int maxCount, int currentCount, Token token) {
if (currentCount > maxCount) {
whenMaxTokensExceeded.accept(maxCount, token);
}
}

@Override
public int getLine() {
return lexer.getLine();
}

@Override
public int getCharPositionInLine() {
return lexer.getCharPositionInLine();
}

@Override
public CharStream getInputStream() {
return lexer.getInputStream();
}

@Override
public String getSourceName() {
return lexer.getSourceName();
}

@Override
public void setTokenFactory(TokenFactory<?> factory) {
lexer.setTokenFactory(factory);
}

@Override
public TokenFactory<?> getTokenFactory() {
return lexer.getTokenFactory();
}
}