Skip to content

Commit

Permalink
[#20573] YSQL: Initial Implementation of Bitmap Scan CBO
Browse files Browse the repository at this point in the history
Summary:
Implemented CBO for Bitmap Scans, using the same approach as PG: store a partially calculated index cost and use that to compute bitmap index access.

If Bitmap Index Scans or Bitmap Ors are predicted to exceed work_mem, they'll have `disable_cost / 2` added to their cost. This discourages the planner from using them,
but still costs them lower than other actually disabled scan types. Without this change, `/*+ BitmapScan(test) */ SELECT * FROM t1000000 ...` will use a sequential scan because it's cheaper, even though the sequential scan has been disabled by the hint.

To better determine if a Bitmap Or will exceed work_mem, more work has been done to determine an estimate of how many rows a Bitmap Or will return.

Also, a slight refactor was done: I put estimated seeks, nexts, width into a YbPlanInfo struct to reduce code duplication of printing / assigning those three values.
Jira: DB-9574

Test Plan:
Jenkins: compile only

Latest: [[ https://jenkins.dev.yugabyte.com/job/optimizer/job/generate-report/231/artifact/taqo/report/20240501-185328/index_yb_vs_yb_cost-validation_.html | TAQO YB CBO with Bitmap Scans enabled / disabled ]]
* Very few regressions in the default case - highest regression was 1.26 on query e57fd1caa18f59c4e40138d377ec2a01. I checked each regression above 1.1, and they were all run-to-run variances using the same execution plan.

[[ https://jenkins.dev.yugabyte.com/job/optimizer/job/generate-report/224/artifact/taqo/report/20240418-134354/index_yb_vs_yb_cost-validation_bitmap_scan_yb_vs_pg.html | TAQO Run vs PG ]]

[[ https://jenkins.dev.yugabyte.com/job/optimizer/job/generate-report/225/artifact/taqo/report/20240419-131907/index_yb_vs_yb_cost-validation_bitmap_scan_ybstatsonly_vs_pg.html | TAQO Run PG vs YB with only stats ]]

```
./yb_build.sh --java-test 'org.yb.pgsql.TestPgBaseScansCostModel'
./yb_build.sh --java-test 'org.yb.pgsql.TestPgCostModelSeekNextEstimation'
```

Reviewers: gkukreja, mtakahara, amartsinchyk, tnayak

Reviewed By: gkukreja

Subscribers: gkukreja, yql

Differential Revision: https://phorge.dev.yugabyte.com/D33861
  • Loading branch information
timothy-e committed May 13, 2024
1 parent 870391c commit eac0c26
Show file tree
Hide file tree
Showing 19 changed files with 1,001 additions and 313 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
public class ExplainAnalyzeUtils {
private static final Logger LOG = LoggerFactory.getLogger(TestPgExplainAnalyze.class);
public static final String NODE_AGGREGATE = "Aggregate";
public static final String NODE_BITMAP_INDEX_SCAN = "Bitmap Index Scan";
public static final String NODE_BITMAP_OR = "BitmapOr";
public static final String NODE_FUNCTION_SCAN = "Function Scan";
public static final String NODE_GATHER = "Gather";
public static final String NODE_GATHER_MERGE = "Gather Merge";
Expand All @@ -39,7 +41,7 @@ public class ExplainAnalyzeUtils {
public static final String NODE_SEQ_SCAN = "Seq Scan";
public static final String NODE_SORT = "Sort";
public static final String NODE_VALUES_SCAN = "Values Scan";

public static final String NODE_YB_BITMAP_TABLE_SCAN = "YB Bitmap Table Scan";
public static final String NODE_YB_BATCHED_NESTED_LOOP = "YB Batched Nested Loop";

public static final String PLAN = "Plan";
Expand Down
137 changes: 135 additions & 2 deletions java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgBaseScansCostModel.java
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
package org.yb.pgsql;

import static org.yb.pgsql.ExplainAnalyzeUtils.NODE_BITMAP_INDEX_SCAN;
import static org.yb.pgsql.ExplainAnalyzeUtils.NODE_BITMAP_OR;
import static org.yb.pgsql.ExplainAnalyzeUtils.NODE_YB_BITMAP_TABLE_SCAN;
import static org.yb.pgsql.ExplainAnalyzeUtils.testExplain;

import java.sql.Statement;

import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.yb.YBTestRunner;
import org.yb.pgsql.ExplainAnalyzeUtils.PlanCheckerBuilder;
import org.yb.pgsql.ExplainAnalyzeUtils.TopLevelCheckerBuilder;
import org.yb.util.json.Checkers;
import org.yb.util.json.JsonUtil;
import org.yb.util.json.ObjectChecker;



@RunWith(value=YBTestRunner.class)
public class TestPgBaseScansCostModel extends BasePgSQLTest {
final double DISABLE_COST = Math.pow(10, 9);
final double BITMAP_SCAN_EXCEEDED_MEMORY_COST = 5 * Math.pow(10, 8);

private static TopLevelCheckerBuilder makeTopLevelBuilder() {
return JsonUtil.makeCheckerBuilder(TopLevelCheckerBuilder.class, false);
Expand Down Expand Up @@ -89,4 +95,131 @@ public void testYBCostModelUsesPgSelectivity() throws Exception {
stmt.execute("DROP TABLE test_21368");
}
}

@Test
public void testBitmapScansExceedingWorkMem() throws Exception {
final String TABLE_NAME = "test";
final String INDEX_A_NAME = "test_a_idx";
final String INDEX_B_NAME = "test_b_idx";
try (Statement stmt = connection.createStatement()) {
stmt.execute("SET yb_enable_base_scans_cost_model = ON");
stmt.execute("SET yb_enable_optimizer_statistics = ON");
stmt.execute("SET enable_bitmapscan = TRUE");
stmt.execute(String.format("CREATE TABLE %s (pk INT, a INT, b INT, " +
"c INT, PRIMARY KEY (pk ASC))", TABLE_NAME));
stmt.execute(String.format("INSERT INTO %s SELECT i, i * 2, i / 2, NULLIF(i %% 10, 0) FROM" +
" generate_series(1, 10000) i", TABLE_NAME));
stmt.execute(String.format("CREATE INDEX %s ON %s(a ASC)", INDEX_A_NAME, TABLE_NAME));
stmt.execute(String.format("CREATE INDEX %s ON %s(b ASC)", INDEX_B_NAME, TABLE_NAME));
stmt.execute(String.format("ANALYZE %s", TABLE_NAME));

final ObjectChecker bitmap_index_a = makePlanBuilder()
.nodeType(NODE_BITMAP_INDEX_SCAN)
.indexName("test_a_idx")
.totalCost(Checkers.less(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.build();
final ObjectChecker bitmap_index_a_exceeded_work_mem = makePlanBuilder()
.nodeType(NODE_BITMAP_INDEX_SCAN)
.indexName("test_a_idx")
.totalCost(Checkers.greater(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.build();

final ObjectChecker bitmap_index_b = makePlanBuilder()
.nodeType(NODE_BITMAP_INDEX_SCAN)
.indexName("test_b_idx")
.totalCost(Checkers.less(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.build();

final ObjectChecker bitmap_index_b_exceeded_work_mem = makePlanBuilder()
.nodeType(NODE_BITMAP_INDEX_SCAN)
.indexName("test_b_idx")
.totalCost(Checkers.greater(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.build();

final String query_prefix = "/*+ BitmapScan(t) */ SELECT * FROM %s AS t WHERE %s";

testExplain(
stmt, String.format(query_prefix, TABLE_NAME, "a < 3000"),
makeTopLevelBuilder()
.plan(makePlanBuilder()
.nodeType(NODE_YB_BITMAP_TABLE_SCAN)
.totalCost(Checkers.less(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.plans(bitmap_index_a)
.build())
.build());

testExplain(
stmt, String.format(query_prefix, TABLE_NAME, "a < 1000 OR b < 3000"),
makeTopLevelBuilder()
.plan(makePlanBuilder()
.nodeType(NODE_YB_BITMAP_TABLE_SCAN)
.totalCost(Checkers.less(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.plans(makePlanBuilder().nodeType(NODE_BITMAP_OR)
.plans(bitmap_index_a, bitmap_index_b)
.totalCost(Checkers.less(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.build())
.build())
.build());

testExplain(
stmt, String.format(query_prefix, TABLE_NAME, "a < 1000 OR (b < 3000 AND c IS NULL)"),
makeTopLevelBuilder()
.plan(makePlanBuilder()
.nodeType(NODE_YB_BITMAP_TABLE_SCAN)
.totalCost(Checkers.less(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.plans(makePlanBuilder().nodeType(NODE_BITMAP_OR)
.plans(bitmap_index_a, bitmap_index_b)
.totalCost(Checkers.less(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.build())
.build())
.build());

/*
* Validate that when a node is expected to exceed work_mem, that node
* has a large cost added to it.
*/
stmt.execute("SET work_mem TO '64kB'");
testExplain(
stmt, String.format(query_prefix, TABLE_NAME, "a < 3000"),
makeTopLevelBuilder()
.plan(makePlanBuilder()
.nodeType(NODE_YB_BITMAP_TABLE_SCAN)
.totalCost(Checkers.greater(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.plans(bitmap_index_a_exceeded_work_mem)
.build())
.build());

testExplain(
stmt, String.format(query_prefix, TABLE_NAME, "a < 1000 OR b < 3000"),
makeTopLevelBuilder()
.plan(makePlanBuilder()
.nodeType(NODE_YB_BITMAP_TABLE_SCAN)
.totalCost(Checkers.greater(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.plans(makePlanBuilder().nodeType(NODE_BITMAP_OR)
.plans(bitmap_index_a, bitmap_index_b_exceeded_work_mem)
.totalCost(Checkers.greater(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.build())
.build())
.build());

/*
* A normal index scan for (b < 3000 AND c IS NULL) would use b < 3000 as
* the index condition, and (c IS NULL) as the Storage Filter. Since
* bitmap index scans can only use Storage Index Filters, we need to
* make sure that the row estimate is correct.
*/
testExplain(
stmt, String.format(query_prefix, TABLE_NAME, "a < 1000 OR (b < 3000 AND c IS NULL)"),
makeTopLevelBuilder()
.plan(makePlanBuilder()
.nodeType(NODE_YB_BITMAP_TABLE_SCAN)
.totalCost(Checkers.greater(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.plans(makePlanBuilder().nodeType(NODE_BITMAP_OR)
.plans(bitmap_index_a, bitmap_index_b_exceeded_work_mem)
.totalCost(Checkers.greater(BITMAP_SCAN_EXCEEDED_MEMORY_COST))
.build())
.build())
.build());
}
}
}

0 comments on commit eac0c26

Please sign in to comment.