Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include support to limit rows in a group by operation #3121

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion h2/src/main/org/h2/api/ErrorCode.java
Original file line number Diff line number Diff line change
Expand Up @@ -2240,7 +2240,15 @@ public class ErrorCode {
*/
public static final int GROUP_BY_NOT_IN_THE_RESULT = 90157;

// next is 90158
/**
* The error with code <code>90158</code> is thrown when the in memory
* table containing all the entries of a group by statement would exceed
* the maximum capacity specified using the system property
* <code>h2.maxGroupByEntries</code>.
*/
public static final int GROUP_BY_TABLE_TOO_LARGE = 90158;

// next is 90159

private ErrorCode() {
// utility class
Expand Down
25 changes: 25 additions & 0 deletions h2/src/main/org/h2/command/query/SelectGroups.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
import java.util.Map.Entry;
import java.util.TreeMap;

import org.h2.api.ErrorCode;
import org.h2.engine.SessionLocal;
import org.h2.engine.SysProperties;
import org.h2.expression.Expression;
import org.h2.expression.analysis.DataAnalysisOperation;
import org.h2.expression.analysis.PartitionData;
import org.h2.message.DbException;
import org.h2.value.Value;
import org.h2.value.ValueRow;

Expand Down Expand Up @@ -46,13 +49,25 @@ public abstract class SelectGroups {

private static final class Grouped extends SelectGroups {

/**
* Memory usage for each treemap entry, in addition to the value,
* consisting of 5 pointers + object overhead
*/
private static final int TREEMAP_ENTRY_OVERHEAD = 56;

private final int[] groupIndex;

/**
* Map of group-by key to group-by expression data e.g. AggregateData
*/
private TreeMap<ValueRow, Object[]> groupByData;

/**
* Approximation of the memory used by groupByData, based on the size of the
* keys that it contains
*/
private int groupByDataSize;

/**
* Key into groupByData that produces currentGroupByExprData. Not used
* in lazy mode.
Expand Down Expand Up @@ -93,8 +108,18 @@ public void nextSource() {
}
Object[] values = groupByData.get(currentGroupsKey);
if (values == null) {
if (SysProperties.maxGroupByMemoryUsage>0 && groupByDataSize>=SysProperties.maxGroupByMemoryUsage) {
throw DbException.get(ErrorCode.GROUP_BY_TABLE_TOO_LARGE);
}
values = createRow();
groupByData.put(currentGroupsKey, values);

// TODO: how to track the memory of values?
// Most values seem to take a minimum of 24 bytes, Boolean and Null take 0 bytes,
// Some objects such as strings and arrays take more. However, for most cases, the
// agregation values will be numeric, so maybe values.length * 32 + 32 is a
// good enough approximation?
groupByDataSize += currentGroupsKey.getMemory() + TREEMAP_ENTRY_OVERHEAD;
}
currentGroupByExprData = values;
currentGroupRowId++;
Expand Down
16 changes: 16 additions & 0 deletions h2/src/main/org/h2/engine/SysProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,22 @@ public class SysProperties {
public static final String AUTH_CONFIG_FILE =
Utils.getProperty("h2.authConfigFile", null);

/**
* System property <code>h2.maxGroupByMemoryUsage </code>
* (default: -1).<br />
* maxGroupByMemoryUsage defines a suggested upper limit to the memory
* used in certain group by operations. If this number is exceeded
* the query will fail with a memory management error.
* The default value of -1 means that the use of memory is not
* limited.
*
* This setting is experimental and the memory usage calculation is
* approximate. In the future it will be removed or replaced when a
* better approach for handling group bys is introduced.
*/
public static int maxGroupByMemoryUsage =
Utils.getProperty("h2.maxGroupByMemoryUsage", -1);

private static final String H2_BASE_DIR = "h2.baseDir";

private SysProperties() {
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/message/DbException.java
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ public static SQLException getJdbcSQLException(String message, String sql, Strin
case ACCESS_DENIED_TO_CLASS_1:
case RESULT_SET_READONLY:
case CURRENT_SEQUENCE_VALUE_IS_NOT_DEFINED_IN_SESSION_1:
case GROUP_BY_TABLE_TOO_LARGE:
return new JdbcSQLNonTransientException(message, sql, state, errorCode, cause, stackTrace);
case FEATURE_NOT_SUPPORTED_1:
return new JdbcSQLFeatureNotSupportedException(message, sql, state, errorCode, cause, stackTrace);
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_cs.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=#Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=#Column alias is not specified for expression {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=Obecná chyba: {0}
HY004=Neznámý datový typ: {0}
HYC00=Vlastnost není podporována: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_de.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=Erzeugte Spalte {0} kann nicht durch eine referentielle Integrität mit dem Ausdruck {1} veränderbar sein
90156=Spalten-Alias ist nicht für den Audruck {0} angegeben
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=Allgemeiner Fehler: {0}
HY004=Unbekannter Datentyp: {0}
HYC00=Dieses Feature wird nicht unterstützt: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_en.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=Column alias is not specified for expression {0}
90157=Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=Group by table has too many entries.
HY000=General error: {0}
HY004=Unknown data type: {0}
HYC00=Feature not supported: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_es.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=#Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=#Column alias is not specified for expression {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=Error General : {0}
HY004=Tipo de dato desconocido : {0}
HYC00=Caracteristica no soportada: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_fr.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=#Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=#Column alias is not specified for expression {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=Erreur générale: {0}
HY004=Type de données inconnu: {0}
HYC00=Fonctionnalité non supportée: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_ja.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=#Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=#Column alias is not specified for expression {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=一般エラー: {0}
HY004=不明なデータ型: {0}
HYC00=機能はサポートされていません: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_pl.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=#Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=#Column alias is not specified for expression {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=Błąd ogólny: {0}
HY004=Nieznany typ danych: {0}
HYC00=Cecha nie jest wspierana: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_pt_br.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=#Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=#Column alias is not specified for expression {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=Erro geral: {0}
HY004=Tipo de dados desconhecido: {0}
HYC00=Recurso não suportado: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_ru.prop
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@
90155=Генерируемый столбец {0} не может обновляться ссылочным ограничением с пунктом {1}
90156=Имя столбца не указано для выражения {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=Внутренняя ошибка: {0}
HY004=Неизвестный тип данных: {0}
HYC00=Данная функция не поддерживается: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_sk.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=#Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=#Column alias is not specified for expression {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=Všeobecná chyba: {0}
HY004=Neznámy dátový typ: {0}
HYC00=Vlastnosť nie je podporovaná: {0}
Expand Down
1 change: 1 addition & 0 deletions h2/src/main/org/h2/res/_messages_zh_cn.prop
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
90155=#Generated column {0} cannot be updatable by a referential constraint with {1} clause
90156=#Column alias is not specified for expression {0}
90157=#Column index {0} in GROUP BY clause is outside valid range 1 - {1}
90158=#Group by table has too many entries.
HY000=常规错误: {0}
HY004=位置数据类型: {0}
HYC00=不支持的特性: {0}
Expand Down
6 changes: 6 additions & 0 deletions h2/src/test/org/h2/test/scripts/TestScript.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.h2.command.Prepared;
import org.h2.command.dml.ScriptCommand;
import org.h2.command.query.Query;
import org.h2.engine.SysProperties;
import org.h2.engine.Mode.ModeEnum;
import org.h2.jdbc.JdbcConnection;
import org.h2.jdbc.JdbcPreparedStatement;
Expand Down Expand Up @@ -234,6 +235,11 @@ public void test() throws Exception {
"table", "values", "window" }) {
testScript("queries/" + s + ".sql");
}

SysProperties.maxGroupByMemoryUsage = 200;
testScript("other/large_group_by.sql");
SysProperties.maxGroupByMemoryUsage = -1;

if (config.mvStore) {
testScript("other/two_phase_commit.sql");
testScript("other/unique_include.sql");
Expand Down
32 changes: 32 additions & 0 deletions h2/src/test/org/h2/test/scripts/other/large_group_by.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-- Copyright 2021-2021 H2 Group. Multiple-Licensed under the MPL 2.0,
-- and the EPL 1.0 (https://h2database.com/html/license.html).
-- Initial Developer: H2 Group
--

-- Test the limits for temporary group by in memory structures

CREATE TABLE TEST (A INT, B VARCHAR) AS (VALUES (10, 'STRING 1'), (11, 'STRING 2'));
> ok

SELECT A, count(*) from TEST group by A;
> A COUNT(*)
> -- --------
> 10 1
> 11 1
> rows: 2

SELECT B, count(*) from TEST WHERE A<11 group by B;
> B COUNT(*)
> -------- --------
> STRING 1 1
> rows: 1

SELECT B, count(*) from TEST group by B;
> exception GROUP_BY_TABLE_TOO_LARGE

SELECT A, max(B) from TEST group by A;
> A MAX(B)
> -- --------
> 10 STRING 1
> 11 STRING 2
> rows: 2