From af0c9566e5a8a037722e5cecca36173202994ae0 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Fri, 26 Apr 2024 14:18:41 +0300 Subject: [PATCH 01/14] Adding new RankFeature phase (#107099) In this PR we add a new search phase, in-between query and fetch, that is responsible for applying any reranking needed. The idea is to trim down the query phase results down to rank_window_size, reach out to the shards to extract any additional feature data if needed, and then use this information to rerank the top results, trim them down to size and pass them to fetch phase. --- .../action/search/FetchSearchPhase.java | 22 +++--- .../action/search/RankFeaturePhase.java | 77 +++++++++++++++++++ .../SearchDfsQueryThenFetchAsyncAction.java | 2 +- .../SearchQueryThenFetchAsyncAction.java | 2 +- .../action/search/FetchSearchPhaseTests.java | 25 ++++-- 5 files changed, 110 insertions(+), 18 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java diff --git a/server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java index 1f06158951392..569e5aec6eca3 100644 --- a/server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java @@ -33,15 +33,21 @@ final class FetchSearchPhase extends SearchPhase { private final BiFunction, SearchPhase> nextPhaseFactory; private final SearchPhaseContext context; private final Logger logger; - private final SearchPhaseResults resultConsumer; private final SearchProgressListener progressListener; private final AggregatedDfs aggregatedDfs; + private final SearchPhaseController.ReducedQueryPhase reducedQueryPhase; - FetchSearchPhase(SearchPhaseResults resultConsumer, AggregatedDfs aggregatedDfs, SearchPhaseContext context) { + FetchSearchPhase( + SearchPhaseResults resultConsumer, + AggregatedDfs aggregatedDfs, + SearchPhaseContext context, + SearchPhaseController.ReducedQueryPhase reducedQueryPhase + ) { this( resultConsumer, aggregatedDfs, context, + reducedQueryPhase, (response, queryPhaseResults) -> new ExpandSearchPhase( context, response.hits, @@ -54,6 +60,7 @@ final class FetchSearchPhase extends SearchPhase { SearchPhaseResults resultConsumer, AggregatedDfs aggregatedDfs, SearchPhaseContext context, + SearchPhaseController.ReducedQueryPhase reducedQueryPhase, BiFunction, SearchPhase> nextPhaseFactory ) { super("fetch"); @@ -72,18 +79,16 @@ final class FetchSearchPhase extends SearchPhase { this.nextPhaseFactory = nextPhaseFactory; this.context = context; this.logger = context.getLogger(); - this.resultConsumer = resultConsumer; this.progressListener = context.getTask().getProgressListener(); + this.reducedQueryPhase = reducedQueryPhase; } @Override public void run() { context.execute(new AbstractRunnable() { + @Override - protected void doRun() throws Exception { - // we do the heavy lifting in this inner run method where we reduce aggs etc. that's why we fork this phase - // off immediately instead of forking when we send back the response to the user since there we only need - // to merge together the fetched results which is a linear operation. + protected void doRun() { innerRun(); } @@ -94,9 +99,8 @@ public void onFailure(Exception e) { }); } - private void innerRun() throws Exception { + private void innerRun() { final int numShards = context.getNumShards(); - final SearchPhaseController.ReducedQueryPhase reducedQueryPhase = resultConsumer.reduce(); // Usually when there is a single shard, we force the search type QUERY_THEN_FETCH. But when there's kNN, we might // still use DFS_QUERY_THEN_FETCH, which does not perform the "query and fetch" optimization during the query phase. final boolean queryAndFetchOptimization = queryResults.length() == 1 diff --git a/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java b/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java new file mode 100644 index 0000000000000..a18d2c6418542 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ +package org.elasticsearch.action.search; + +import org.elasticsearch.common.util.concurrent.AbstractRunnable; +import org.elasticsearch.search.SearchPhaseResult; +import org.elasticsearch.search.dfs.AggregatedDfs; + +/** + * This search phase is responsible for executing any re-ranking needed for the given search request, iff that is applicable. + * It starts by retrieving {code num_shards * window_size} results from the query phase and reduces them to a global list of + * the top {@code window_size} results. It then reaches out to the shards to extract the needed feature data, + * and finally passes all this information to the appropriate {@code RankFeatureRankCoordinatorContext} which is responsible for reranking + * the results. If no rank query is specified, it proceeds directly to the next phase (FetchSearchPhase) by first reducing the results. + */ +public final class RankFeaturePhase extends SearchPhase { + + private final SearchPhaseContext context; + private final SearchPhaseResults queryPhaseResults; + private final SearchPhaseResults rankPhaseResults; + + private final AggregatedDfs aggregatedDfs; + + RankFeaturePhase(SearchPhaseResults queryPhaseResults, AggregatedDfs aggregatedDfs, SearchPhaseContext context) { + super("rank-feature"); + if (context.getNumShards() != queryPhaseResults.getNumShards()) { + throw new IllegalStateException( + "number of shards must match the length of the query results but doesn't:" + + context.getNumShards() + + "!=" + + queryPhaseResults.getNumShards() + ); + } + this.context = context; + this.queryPhaseResults = queryPhaseResults; + this.aggregatedDfs = aggregatedDfs; + this.rankPhaseResults = new ArraySearchPhaseResults<>(context.getNumShards()); + context.addReleasable(rankPhaseResults); + } + + @Override + public void run() { + context.execute(new AbstractRunnable() { + @Override + protected void doRun() throws Exception { + // we need to reduce the results at this point instead of fetch phase, so we fork this process similarly to how + // was set up at FetchSearchPhase. + + // we do the heavy lifting in this inner run method where we reduce aggs etc + innerRun(); + } + + @Override + public void onFailure(Exception e) { + context.onPhaseFailure(RankFeaturePhase.this, "", e); + } + }); + } + + private void innerRun() throws Exception { + // other than running reduce, this is currently close to a no-op + SearchPhaseController.ReducedQueryPhase reducedQueryPhase = queryPhaseResults.reduce(); + moveToNextPhase(queryPhaseResults, reducedQueryPhase); + } + + private void moveToNextPhase( + SearchPhaseResults phaseResults, + SearchPhaseController.ReducedQueryPhase reducedQueryPhase + ) { + context.executeNextPhase(this, new FetchSearchPhase(phaseResults, aggregatedDfs, context, reducedQueryPhase)); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java index fcc848384866a..f0dca04efe374 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java @@ -100,7 +100,7 @@ protected SearchPhase getNextPhase(final SearchPhaseResults res aggregatedDfs, mergedKnnResults, queryPhaseResultConsumer, - (queryResults) -> new FetchSearchPhase(queryResults, aggregatedDfs, context), + (queryResults) -> new RankFeaturePhase(queryResults, aggregatedDfs, context), context ); } diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java index 3ad7c52567d14..4720653c29381 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java @@ -122,7 +122,7 @@ && getRequest().scroll() == null @Override protected SearchPhase getNextPhase(final SearchPhaseResults results, SearchPhaseContext context) { - return new FetchSearchPhase(results, null, this); + return new RankFeaturePhase(results, null, this); } private ShardSearchRequest rewriteShardSearchRequest(ShardSearchRequest request) { diff --git a/server/src/test/java/org/elasticsearch/action/search/FetchSearchPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/FetchSearchPhaseTests.java index a2c5bed51f5e7..7b7061c0e1bc6 100644 --- a/server/src/test/java/org/elasticsearch/action/search/FetchSearchPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/FetchSearchPhaseTests.java @@ -47,7 +47,7 @@ public class FetchSearchPhaseTests extends ESTestCase { private static final long FETCH_PROFILE_TIME = 555; - public void testShortcutQueryAndFetchOptimization() { + public void testShortcutQueryAndFetchOptimization() throws Exception { SearchPhaseController controller = new SearchPhaseController((t, s) -> InternalAggregationTestCase.emptyReduceContextBuilder()); MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(1); try ( @@ -99,11 +99,12 @@ public void testShortcutQueryAndFetchOptimization() { } else { numHits = 0; } - + SearchPhaseController.ReducedQueryPhase reducedQueryPhase = results.reduce(); FetchSearchPhase phase = new FetchSearchPhase( results, null, mockSearchPhaseContext, + reducedQueryPhase, (searchResponse, scrollId) -> new SearchPhase("test") { @Override public void run() { @@ -141,7 +142,7 @@ private void assertProfiles(boolean profiled, int totalShards, SearchResponse se } } - public void testFetchTwoDocument() { + public void testFetchTwoDocument() throws Exception { MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(2); SearchPhaseController controller = new SearchPhaseController((t, s) -> InternalAggregationTestCase.emptyReduceContextBuilder()); try ( @@ -231,10 +232,12 @@ public void sendExecuteFetch( } } }; + SearchPhaseController.ReducedQueryPhase reducedQueryPhase = results.reduce(); FetchSearchPhase phase = new FetchSearchPhase( results, null, mockSearchPhaseContext, + reducedQueryPhase, (searchResponse, scrollId) -> new SearchPhase("test") { @Override public void run() { @@ -262,7 +265,7 @@ public void run() { } } - public void testFailFetchOneDoc() { + public void testFailFetchOneDoc() throws Exception { MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(2); SearchPhaseController controller = new SearchPhaseController((t, s) -> InternalAggregationTestCase.emptyReduceContextBuilder()); try ( @@ -343,10 +346,12 @@ public void sendExecuteFetch( } } }; + SearchPhaseController.ReducedQueryPhase reducedQueryPhase = results.reduce(); FetchSearchPhase phase = new FetchSearchPhase( results, null, mockSearchPhaseContext, + reducedQueryPhase, (searchResponse, scrollId) -> new SearchPhase("test") { @Override public void run() { @@ -390,7 +395,7 @@ public void run() { } } - public void testFetchDocsConcurrently() throws InterruptedException { + public void testFetchDocsConcurrently() throws Exception { int resultSetSize = randomIntBetween(0, 100); // we use at least 2 hits otherwise this is subject to single shard optimization and we trip an assert... int numHits = randomIntBetween(2, 100); // also numshards --> 1 hit per shard @@ -454,10 +459,12 @@ public void sendExecuteFetch( } }; CountDownLatch latch = new CountDownLatch(1); + SearchPhaseController.ReducedQueryPhase reducedQueryPhase = results.reduce(); FetchSearchPhase phase = new FetchSearchPhase( results, null, mockSearchPhaseContext, + reducedQueryPhase, (searchResponse, scrollId) -> new SearchPhase("test") { @Override public void run() { @@ -509,7 +516,7 @@ public void run() { } } - public void testExceptionFailsPhase() { + public void testExceptionFailsPhase() throws Exception { MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(2); SearchPhaseController controller = new SearchPhaseController((t, s) -> InternalAggregationTestCase.emptyReduceContextBuilder()); try ( @@ -600,10 +607,12 @@ public void sendExecuteFetch( } } }; + SearchPhaseController.ReducedQueryPhase reducedQueryPhase = results.reduce(); FetchSearchPhase phase = new FetchSearchPhase( results, null, mockSearchPhaseContext, + reducedQueryPhase, (searchResponse, scrollId) -> new SearchPhase("test") { @Override public void run() { @@ -624,7 +633,7 @@ public void run() { } } - public void testCleanupIrrelevantContexts() { // contexts that are not fetched should be cleaned up + public void testCleanupIrrelevantContexts() throws Exception { // contexts that are not fetched should be cleaned up MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(2); SearchPhaseController controller = new SearchPhaseController((t, s) -> InternalAggregationTestCase.emptyReduceContextBuilder()); try ( @@ -705,10 +714,12 @@ public void sendExecuteFetch( } } }; + SearchPhaseController.ReducedQueryPhase reducedQueryPhase = results.reduce(); FetchSearchPhase phase = new FetchSearchPhase( results, null, mockSearchPhaseContext, + reducedQueryPhase, (searchResponse, scrollId) -> new SearchPhase("test") { @Override public void run() { From 3183e6d6c93e0c22e5aaa77160d5afa36b1f62fd Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Fri, 26 Apr 2024 15:35:31 +0300 Subject: [PATCH 02/14] Add ignored field values to synthetic source (#107567) * Add ignored field values to synthetic source * Update docs/changelog/107567.yaml * initialize map * yaml fix * add node feature * add comments * small fixes * missing cluster feature in yaml * constants for chars, stored fields * remove duplicate method * throw exception on parse failure * remove Base64 encoding * add assert on IgnoredValuesFieldMapper::write * changes from review * simplify logic * add comment * rename classes * rename _ignored_values to _ignored_source * rename _ignored_values to _ignored_source --- docs/changelog/107567.yaml | 5 + .../indices.create/20_synthetic_source.yml | 151 +++++++ .../test/nodes.stats/11_indices_metrics.yml | 51 +-- server/src/main/java/module-info.java | 1 + .../index/mapper/DocumentParserContext.java | 33 ++ .../mapper/IgnoreMalformedStoredValues.java | 141 +------ .../mapper/IgnoredSourceFieldMapper.java | 133 ++++++ .../index/mapper/MapperFeatures.java | 24 ++ .../index/mapper/ObjectMapper.java | 20 +- .../index/mapper/SourceLoader.java | 17 + .../index/mapper/XContentDataHelper.java | 399 ++++++++++++++++++ .../elasticsearch/indices/IndicesModule.java | 2 + ...lasticsearch.features.FeatureSpecification | 1 + .../mapper/DocCountFieldMapperTests.java | 5 +- .../index/mapper/DocumentMapperTests.java | 2 + .../mapper/IgnoredSourceFieldMapperTests.java | 148 +++++++ .../index/mapper/XContentDataHelperTests.java | 90 ++++ .../indices/IndicesModuleTests.java | 2 + 18 files changed, 1059 insertions(+), 166 deletions(-) create mode 100644 docs/changelog/107567.yaml create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/XContentDataHelperTests.java diff --git a/docs/changelog/107567.yaml b/docs/changelog/107567.yaml new file mode 100644 index 0000000000000..558b5b570b1fb --- /dev/null +++ b/docs/changelog/107567.yaml @@ -0,0 +1,5 @@ +pr: 107567 +summary: Add ignored field values to synthetic source +area: Mapping +type: enhancement +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 62a4e240a5b5d..39787366c0cc9 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -36,3 +36,154 @@ nested is disabled: properties: foo: type: keyword + +--- +object with unmapped fields: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored values + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + total_fields: + ignore_dynamic_beyond_limit: true + limit: 1 + + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "name": "aaaa", "some_string": "AaAa", "some_int": 1000, "some_double": 123.456789, "some_bool": true, "a.very.deeply.nested.field": "AAAA" }' + - '{ "create": { } }' + - '{ "name": "bbbb", "some_string": "BbBb", "some_int": 2000, "some_double": 321.987654, "some_bool": false, "a.very.deeply.nested.field": "BBBB" }' + + - do: + search: + index: test + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: aaaa } + - match: { hits.hits.0._source.some_string: AaAa } + - match: { hits.hits.0._source.some_int: 1000 } + - match: { hits.hits.0._source.some_double: 123.456789 } + - match: { hits.hits.0._source.a.very.deeply.nested.field: AAAA } + - match: { hits.hits.0._source.some_bool: true } + - match: { hits.hits.1._source.name: bbbb } + - match: { hits.hits.1._source.some_string: BbBb } + - match: { hits.hits.1._source.some_int: 2000 } + - match: { hits.hits.1._source.some_double: 321.987654 } + - match: { hits.hits.1._source.a.very.deeply.nested.field: BBBB } + + +--- +nested object with unmapped fields: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored values + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + total_fields: + ignore_dynamic_beyond_limit: true + limit: 3 + + mappings: + _source: + mode: synthetic + properties: + path: + properties: + to: + properties: + name: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "path.to.name": "aaaa", "path.to.surname": "AaAa", "path.some.other.name": "AaAaAa" }' + - '{ "create": { } }' + - '{ "path.to.name": "bbbb", "path.to.surname": "BbBb", "path.some.other.name": "BbBbBb" }' + + - do: + search: + index: test + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.path.to.name: aaaa } + - match: { hits.hits.0._source.path.to.surname: AaAa } + - match: { hits.hits.0._source.path.some.other.name: AaAaAa } + - match: { hits.hits.1._source.path.to.name: bbbb } + - match: { hits.hits.1._source.path.to.surname: BbBb } + - match: { hits.hits.1._source.path.some.other.name: BbBbBb } + + +--- +empty object with unmapped fields: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored values + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + total_fields: + ignore_dynamic_beyond_limit: true + limit: 3 + + mappings: + _source: + mode: synthetic + properties: + path: + properties: + to: + properties: + name: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "path.to.surname": "AaAa", "path.some.other.name": "AaAaAa" }' + - '{ "create": { } }' + - '{ "path.to.surname": "BbBb", "path.some.other.name": "BbBbBb" }' + + - do: + search: + index: test + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.path.to.surname: AaAa } + - match: { hits.hits.0._source.path.some.other.name: AaAaAa } + - match: { hits.hits.1._source.path.to.surname: BbBb } + - match: { hits.hits.1._source.path.some.other.name: BbBbBb } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml index 1a7da98af9129..ac0f8aec4f3d0 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml @@ -417,8 +417,8 @@ - requires: test_runner_features: [arbitrary_key] - cluster_features: ["gte_v8.5.0"] - reason: "mappings added in version 8.5.0" + cluster_features: ["mapper.track_ignored_source"] + reason: "_ignored_source added to mappings" - do: indices.create: @@ -476,35 +476,36 @@ # 4. _field_names # 5. _id # 6. _ignored - # 7. _index - # 8. _nested_path - # 9. _routing - # 10. _seq_no - # 11. _source - # 12. _tier - # 13. _version - # 14. @timestamp - # 15. authors.age - # 16. authors.company - # 17. authors.company.keyword - # 18. authors.name.last_name - # 19. authors.name.first_name - # 20. authors.name.full_name - # 21. link - # 22. title - # 23. url + # 7. _ignored_source + # 8. _index + # 9. _nested_path + # 10. _routing + # 11. _seq_no + # 12. _source + # 13. _tier + # 14. _version + # 15. @timestamp + # 16. authors.age + # 17. authors.company + # 18. authors.company.keyword + # 19. authors.name.last_name + # 20. authors.name.first_name + # 21. authors.name.full_name + # 22. link + # 23. title + # 24. url # Object mappers: - # 24. authors - # 25. authors.name + # 25. authors + # 26. authors.name # Runtime field mappers: - # 26. a_source_field + # 27. a_source_field - - gte: { nodes.$node_id.indices.mappings.total_count: 26 } + - gte: { nodes.$node_id.indices.mappings.total_count: 27 } - is_true: nodes.$node_id.indices.mappings.total_estimated_overhead - gte: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 26624 } - - match: { nodes.$node_id.indices.indices.index1.mappings.total_count: 26 } + - match: { nodes.$node_id.indices.indices.index1.mappings.total_count: 27 } - is_true: nodes.$node_id.indices.indices.index1.mappings.total_estimated_overhead - - match: { nodes.$node_id.indices.indices.index1.mappings.total_estimated_overhead_in_bytes: 26624 } + - match: { nodes.$node_id.indices.indices.index1.mappings.total_estimated_overhead_in_bytes: 27648 } --- "indices mappings does not exist in shards level": diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 7560e2ff1a7d6..475158c7a8709 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -426,6 +426,7 @@ org.elasticsearch.rest.RestFeatures, org.elasticsearch.indices.IndicesFeatures, org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures, + org.elasticsearch.index.mapper.MapperFeatures, org.elasticsearch.search.retriever.RetrieversFeatures; uses org.elasticsearch.plugins.internal.SettingsExtension; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index a42477bed2146..de1266ae3a7ee 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -104,6 +104,7 @@ public int get() { private final MappingParserContext mappingParserContext; private final SourceToParse sourceToParse; private final Set ignoredFields; + private final List ignoredFieldValues; private final Map> dynamicMappers; private final DynamicMapperSize dynamicMappersSize; private final Map dynamicObjectMappers; @@ -122,6 +123,7 @@ private DocumentParserContext( MappingParserContext mappingParserContext, SourceToParse sourceToParse, Set ignoreFields, + List ignoredFieldValues, Map> dynamicMappers, Map dynamicObjectMappers, Map> dynamicRuntimeFields, @@ -139,6 +141,7 @@ private DocumentParserContext( this.mappingParserContext = mappingParserContext; this.sourceToParse = sourceToParse; this.ignoredFields = ignoreFields; + this.ignoredFieldValues = ignoredFieldValues; this.dynamicMappers = dynamicMappers; this.dynamicObjectMappers = dynamicObjectMappers; this.dynamicRuntimeFields = dynamicRuntimeFields; @@ -159,6 +162,7 @@ private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic, in.mappingParserContext, in.sourceToParse, in.ignoredFields, + in.ignoredFieldValues, in.dynamicMappers, in.dynamicObjectMappers, in.dynamicRuntimeFields, @@ -186,6 +190,7 @@ protected DocumentParserContext( mappingParserContext, source, new HashSet<>(), + new ArrayList<>(), new HashMap<>(), new HashMap<>(), new HashMap<>(), @@ -251,6 +256,20 @@ public final Collection getIgnoredFields() { return Collections.unmodifiableCollection(ignoredFields); } + /** + * Add the given ignored values to the corresponding list. + */ + public final void addIgnoredField(IgnoredSourceFieldMapper.NameValue values) { + ignoredFieldValues.add(values); + } + + /** + * Return the collection of values for fields that have been ignored so far. + */ + public final Collection getIgnoredFieldValues() { + return Collections.unmodifiableCollection(ignoredFieldValues); + } + /** * Add the given {@code field} to the _field_names field * @@ -345,6 +364,20 @@ public final boolean addDynamicMapper(Mapper mapper) { int additionalFieldsToAdd = getNewFieldsSize() + mapperSize; if (indexSettings().isIgnoreDynamicFieldsBeyondLimit()) { if (mappingLookup.exceedsLimit(indexSettings().getMappingTotalFieldsLimit(), additionalFieldsToAdd)) { + if (indexSettings().getMode().isSyntheticSourceEnabled() || mappingLookup.isSourceSynthetic()) { + try { + int parentOffset = parent() instanceof RootObjectMapper ? 0 : parent().fullPath().length() + 1; + addIgnoredField( + new IgnoredSourceFieldMapper.NameValue( + mapper.name(), + parentOffset, + XContentDataHelper.encodeToken(parser()) + ) + ); + } catch (IOException e) { + throw new IllegalArgumentException("failed to parse field [" + mapper.name() + " ]", e); + } + } addIgnoredField(mapper.name()); return false; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java index b7990648539c1..52f4048e9b230 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java @@ -10,17 +10,10 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefIterator; -import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; -import org.elasticsearch.xcontent.XContentParserConfiguration; -import org.elasticsearch.xcontent.XContentType; import java.io.IOException; -import java.math.BigDecimal; -import java.math.BigInteger; import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -32,38 +25,8 @@ * {@code _source}. */ public abstract class IgnoreMalformedStoredValues { - /** - * Build a {@link StoredField} for the value on which the parser is - * currently positioned. - *

- * We try to use {@link StoredField}'s native types for fields where - * possible but we have to preserve more type information than - * stored fields support, so we encode all of those into stored fields' - * {@code byte[]} type and then encode type information in the first byte. - *

- */ - public static StoredField storedField(String fieldName, XContentParser parser) throws IOException { - String name = name(fieldName); - return switch (parser.currentToken()) { - case VALUE_STRING -> new StoredField(name, parser.text()); - case VALUE_NUMBER -> switch (parser.numberType()) { - case INT -> new StoredField(name, parser.intValue()); - case LONG -> new StoredField(name, parser.longValue()); - case DOUBLE -> new StoredField(name, parser.doubleValue()); - case FLOAT -> new StoredField(name, parser.floatValue()); - case BIG_INTEGER -> new StoredField(name, encode((BigInteger) parser.numberValue())); - case BIG_DECIMAL -> new StoredField(name, encode((BigDecimal) parser.numberValue())); - }; - case VALUE_BOOLEAN -> new StoredField(name, new byte[] { parser.booleanValue() ? (byte) 't' : (byte) 'f' }); - case VALUE_EMBEDDED_OBJECT -> new StoredField(name, encode(parser.binaryValue())); - case START_OBJECT, START_ARRAY -> { - try (XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent())) { - builder.copyCurrentStructure(parser); - yield new StoredField(name, encode(builder)); - } - } - default -> throw new IllegalArgumentException("synthetic _source doesn't support malformed objects"); - }; + public static StoredField storedField(String name, XContentParser parser) throws IOException { + return XContentDataHelper.storedField(name(name), parser); } /** @@ -136,114 +99,16 @@ public int count() { public void write(XContentBuilder b) throws IOException { for (Object v : values) { if (v instanceof BytesRef r) { - decodeAndWrite(b, r); + XContentDataHelper.decodeAndWrite(b, r); } else { b.value(v); } } values = emptyList(); } - - private static void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { - switch (r.bytes[r.offset]) { - case 'b': - b.value(r.bytes, r.offset + 1, r.length - 1); - return; - case 'c': - decodeAndWriteXContent(b, XContentType.CBOR, r); - return; - case 'd': - if (r.length < 5) { - throw new IllegalArgumentException("Can't decode " + r); - } - int scale = ByteUtils.readIntLE(r.bytes, r.offset + 1); - b.value(new BigDecimal(new BigInteger(r.bytes, r.offset + 5, r.length - 5), scale)); - return; - case 'f': - if (r.length != 1) { - throw new IllegalArgumentException("Can't decode " + r); - } - b.value(false); - return; - case 'i': - b.value(new BigInteger(r.bytes, r.offset + 1, r.length - 1)); - return; - case 'j': - decodeAndWriteXContent(b, XContentType.JSON, r); - return; - case 's': - decodeAndWriteXContent(b, XContentType.SMILE, r); - return; - case 't': - if (r.length != 1) { - throw new IllegalArgumentException("Can't decode " + r); - } - b.value(true); - return; - case 'y': - decodeAndWriteXContent(b, XContentType.YAML, r); - return; - default: - throw new IllegalArgumentException("Can't decode " + r); - } - } - - private static void decodeAndWriteXContent(XContentBuilder b, XContentType type, BytesRef r) throws IOException { - try ( - XContentParser parser = type.xContent().createParser(XContentParserConfiguration.EMPTY, r.bytes, r.offset + 1, r.length - 1) - ) { - b.copyCurrentStructure(parser); - } - } } private static String name(String fieldName) { return fieldName + "._ignore_malformed"; } - - private static byte[] encode(BigInteger n) { - byte[] twosCompliment = n.toByteArray(); - byte[] encoded = new byte[1 + twosCompliment.length]; - encoded[0] = 'i'; - System.arraycopy(twosCompliment, 0, encoded, 1, twosCompliment.length); - return encoded; - } - - private static byte[] encode(BigDecimal n) { - byte[] twosCompliment = n.unscaledValue().toByteArray(); - byte[] encoded = new byte[5 + twosCompliment.length]; - encoded[0] = 'd'; - ByteUtils.writeIntLE(n.scale(), encoded, 1); - System.arraycopy(twosCompliment, 0, encoded, 5, twosCompliment.length); - return encoded; - } - - private static byte[] encode(byte[] b) { - byte[] encoded = new byte[1 + b.length]; - encoded[0] = 'b'; - System.arraycopy(b, 0, encoded, 1, b.length); - return encoded; - } - - private static byte[] encode(XContentBuilder builder) throws IOException { - BytesReference b = BytesReference.bytes(builder); - byte[] encoded = new byte[1 + b.length()]; - encoded[0] = switch (builder.contentType()) { - case JSON -> 'j'; - case SMILE -> 's'; - case YAML -> 'y'; - case CBOR -> 'c'; - default -> throw new IllegalArgumentException("unsupported type " + builder.contentType()); - }; - - int position = 1; - BytesRefIterator itr = b.iterator(); - BytesRef ref; - while ((ref = itr.next()) != null) { - System.arraycopy(ref.bytes, ref.offset, encoded, position, ref.length); - position += ref.length; - } - assert position == encoded.length; - return encoded; - } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java new file mode 100644 index 0000000000000..1daa7d1d674e3 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -0,0 +1,133 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.query.SearchExecutionContext; + +import java.nio.charset.StandardCharsets; +import java.util.Collections; + +/** + + * Mapper for the {@code _ignored_source} field. + * + * A field mapper that records fields that have been ignored, along with their values. It's intended for use + * in indexes with synthetic source to reconstruct the latter, taking into account fields that got ignored during + * indexing. + * + * This overlaps with {@link IgnoredFieldMapper} that tracks just the ignored field names. It's worth evaluating + * if we can replace it for all use cases to avoid duplication, assuming that the storage tradeoff is favorable. + */ +public class IgnoredSourceFieldMapper extends MetadataFieldMapper { + + // This factor is used to combine two offsets within the same integer: + // - the offset of the end of the parent field within the field name (N / PARENT_OFFSET_IN_NAME_OFFSET) + // - the offset of the field value within the encoding string containing the offset (first 4 bytes), the field name and value + // (N % PARENT_OFFSET_IN_NAME_OFFSET) + private static final int PARENT_OFFSET_IN_NAME_OFFSET = 1 << 16; + + public static final String NAME = "_ignored_source"; + + public static final IgnoredSourceFieldMapper INSTANCE = new IgnoredSourceFieldMapper(); + + public static final TypeParser PARSER = new FixedTypeParser(context -> INSTANCE); + + static final NodeFeature TRACK_IGNORED_SOURCE = new NodeFeature("mapper.track_ignored_source"); + + /* + * Container for the ignored field data: + * - the full name + * - the offset in the full name indicating the end of the substring matching + * the full name of the parent field + * - the value, encoded as a byte array + */ + public record NameValue(String name, int parentOffset, BytesRef value) { + String getParentFieldName() { + // _doc corresponds to the root object + return (parentOffset == 0) ? "_doc" : name.substring(0, parentOffset - 1); + } + + String getFieldName() { + return parentOffset() == 0 ? name() : name().substring(parentOffset()); + } + } + + static final class IgnoredValuesFieldMapperType extends StringFieldType { + + private static final IgnoredValuesFieldMapperType INSTANCE = new IgnoredValuesFieldMapperType(); + + private IgnoredValuesFieldMapperType() { + super(NAME, false, true, false, TextSearchInfo.NONE, Collections.emptyMap()); + } + + @Override + public String typeName() { + return NAME; + } + + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + return new StoredValueFetcher(context.lookup(), NAME); + } + } + + private IgnoredSourceFieldMapper() { + super(IgnoredValuesFieldMapperType.INSTANCE); + } + + @Override + protected String contentType() { + return NAME; + } + + @Override + public void postParse(DocumentParserContext context) { + // Ignored values are only expected in synthetic mode. + assert context.getIgnoredFieldValues().isEmpty() + || context.indexSettings().getMode().isSyntheticSourceEnabled() + || context.mappingLookup().isSourceSynthetic(); + for (NameValue nameValue : context.getIgnoredFieldValues()) { + context.doc().add(new StoredField(NAME, encode(nameValue))); + } + } + + static byte[] encode(NameValue values) { + assert values.parentOffset < PARENT_OFFSET_IN_NAME_OFFSET; + assert values.parentOffset * (long) PARENT_OFFSET_IN_NAME_OFFSET < Integer.MAX_VALUE; + + byte[] nameBytes = values.name.getBytes(StandardCharsets.UTF_8); + byte[] bytes = new byte[4 + nameBytes.length + values.value.length]; + ByteUtils.writeIntLE(values.name.length() + PARENT_OFFSET_IN_NAME_OFFSET * values.parentOffset, bytes, 0); + System.arraycopy(nameBytes, 0, bytes, 4, nameBytes.length); + System.arraycopy(values.value.bytes, values.value.offset, bytes, 4 + nameBytes.length, values.value.length); + return bytes; + } + + static NameValue decode(Object field) { + byte[] bytes = ((BytesRef) field).bytes; + int encodedSize = ByteUtils.readIntLE(bytes, 0); + int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET; + int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET; + String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8); + BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4); + return new NameValue(name, parentOffset, value); + } + + // This mapper doesn't contribute to source directly as it has no access to the object structure. Instead, its contents + // are loaded by SourceLoader and passed to object mappers that, in turn, write their ignore fields at the appropriate level. + @Override + public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { + return SourceLoader.SyntheticFieldLoader.NOTHING; + } + +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java new file mode 100644 index 0000000000000..dc189aecab01c --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.features.FeatureSpecification; +import org.elasticsearch.features.NodeFeature; + +import java.util.Set; + +/** + * Spec for mapper-related features. + */ +public class MapperFeatures implements FeatureSpecification { + @Override + public Set getFeatures() { + return Set.of(IgnoredSourceFieldMapper.TRACK_IGNORED_SOURCE); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index ba396e9a72d30..dca6af2489910 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -730,6 +730,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { private class SyntheticSourceFieldLoader implements SourceLoader.SyntheticFieldLoader { private final List fields; private boolean hasValue; + private List ignoredValues; private SyntheticSourceFieldLoader(List fields) { this.fields = fields; @@ -793,8 +794,25 @@ public void write(XContentBuilder b) throws IOException { field.write(b); } } - b.endObject(); hasValue = false; + if (ignoredValues != null) { + for (IgnoredSourceFieldMapper.NameValue ignored : ignoredValues) { + b.field(ignored.getFieldName()); + XContentDataHelper.decodeAndWrite(b, ignored.value()); + } + ignoredValues = null; + } + b.endObject(); + } + + @Override + public boolean setIgnoredValues(Map> objectsWithIgnoredFields) { + ignoredValues = objectsWithIgnoredFields.get(name()); + hasValue |= ignoredValues != null; + for (SourceLoader.SyntheticFieldLoader loader : fields) { + hasValue |= loader.setIgnoredValues(objectsWithIgnoredFields); + } + return this.ignoredValues != null; } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java index c07821f3c9ae7..f37f494cb8865 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java @@ -17,6 +17,8 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -89,6 +91,7 @@ public Synthetic(Mapping mapping) { .storedFieldLoaders() .map(Map.Entry::getKey) .collect(Collectors.toSet()); + this.requiredStoredFields.add(IgnoredSourceFieldMapper.NAME); } @Override @@ -122,12 +125,22 @@ private SyntheticLeaf(SyntheticFieldLoader loader, SyntheticFieldLoader.DocValue @Override public Source source(LeafStoredFieldLoader storedFieldLoader, int docId) throws IOException { + // Maps the names of existing objects to lists of ignored fields they contain. + Map> objectsWithIgnoredFields = new HashMap<>(); + for (Map.Entry> e : storedFieldLoader.storedFields().entrySet()) { SyntheticFieldLoader.StoredFieldLoader loader = storedFieldLoaders.get(e.getKey()); if (loader != null) { loader.load(e.getValue()); } + if (IgnoredSourceFieldMapper.NAME.equals(e.getKey())) { + for (Object value : e.getValue()) { + IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); + objectsWithIgnoredFields.computeIfAbsent(nameValue.getParentFieldName(), k -> new ArrayList<>()).add(nameValue); + } + } } + loader.setIgnoredValues(objectsWithIgnoredFields); if (docValuesLoader != null) { docValuesLoader.advanceToDoc(docId); } @@ -224,6 +237,10 @@ public void write(XContentBuilder b) {} */ void write(XContentBuilder b) throws IOException; + default boolean setIgnoredValues(Map> objectsWithIgnoredFields) { + return false; + } + /** * Sync for stored field values. */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java b/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java new file mode 100644 index 0000000000000..c41fbd5057227 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java @@ -0,0 +1,399 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.core.CheckedFunction; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * Helper class for processing field data of any type, as provided by the {@link XContentParser}. + */ +final class XContentDataHelper { + /** + * Build a {@link StoredField} for the value on which the parser is + * currently positioned. + *

+ * We try to use {@link StoredField}'s native types for fields where + * possible but we have to preserve more type information than + * stored fields support, so we encode all of those into stored fields' + * {@code byte[]} type and then encode type information in the first byte. + *

+ */ + static StoredField storedField(String name, XContentParser parser) throws IOException { + return (StoredField) processToken(parser, typeUtils -> typeUtils.buildStoredField(name, parser)); + } + + /** + * Build a {@link BytesRef} wrapping a byte array containing an encoded form + * the value on which the parser is currently positioned. + */ + static BytesRef encodeToken(XContentParser parser) throws IOException { + return new BytesRef((byte[]) processToken(parser, (typeUtils) -> typeUtils.encode(parser))); + } + + /** + * Decode the value in the passed {@link BytesRef} and add it as a value to the + * passed build. The assumption is that the passed value has encoded using the function + * {@link #encodeToken(XContentParser)} above. + */ + static void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + switch ((char) r.bytes[r.offset]) { + case BINARY_ENCODING -> TypeUtils.EMBEDDED_OBJECT.decodeAndWrite(b, r); + case CBOR_OBJECT_ENCODING, JSON_OBJECT_ENCODING, YAML_OBJECT_ENCODING, SMILE_OBJECT_ENCODING -> { + TypeUtils.START.decodeAndWrite(b, r); + } + case BIG_DECIMAL_ENCODING -> TypeUtils.BIG_DECIMAL.decodeAndWrite(b, r); + case FALSE_ENCODING, TRUE_ENCODING -> TypeUtils.BOOLEAN.decodeAndWrite(b, r); + case BIG_INTEGER_ENCODING -> TypeUtils.BIG_INTEGER.decodeAndWrite(b, r); + case STRING_ENCODING -> TypeUtils.STRING.decodeAndWrite(b, r); + case INTEGER_ENCODING -> TypeUtils.INTEGER.decodeAndWrite(b, r); + case LONG_ENCODING -> TypeUtils.LONG.decodeAndWrite(b, r); + case DOUBLE_ENCODING -> TypeUtils.DOUBLE.decodeAndWrite(b, r); + case FLOAT_ENCODING -> TypeUtils.FLOAT.decodeAndWrite(b, r); + default -> throw new IllegalArgumentException("Can't decode " + r); + } + } + + private static Object processToken(XContentParser parser, CheckedFunction visitor) throws IOException { + return switch (parser.currentToken()) { + case VALUE_STRING -> visitor.apply(TypeUtils.STRING); + case VALUE_NUMBER -> switch (parser.numberType()) { + case INT -> visitor.apply(TypeUtils.INTEGER); + case LONG -> visitor.apply(TypeUtils.LONG); + case DOUBLE -> visitor.apply(TypeUtils.DOUBLE); + case FLOAT -> visitor.apply(TypeUtils.FLOAT); + case BIG_INTEGER -> visitor.apply(TypeUtils.BIG_INTEGER); + case BIG_DECIMAL -> visitor.apply(TypeUtils.BIG_DECIMAL); + }; + case VALUE_BOOLEAN -> visitor.apply(TypeUtils.BOOLEAN); + case VALUE_EMBEDDED_OBJECT -> visitor.apply(TypeUtils.EMBEDDED_OBJECT); + case START_OBJECT, START_ARRAY -> visitor.apply(TypeUtils.START); + default -> throw new IllegalArgumentException("synthetic _source doesn't support malformed objects"); + }; + } + + private static final char STRING_ENCODING = 'S'; + private static final char INTEGER_ENCODING = 'I'; + private static final char LONG_ENCODING = 'L'; + private static final char DOUBLE_ENCODING = 'D'; + private static final char FLOAT_ENCODING = 'F'; + private static final char BIG_INTEGER_ENCODING = 'i'; + private static final char BIG_DECIMAL_ENCODING = 'd'; + private static final char FALSE_ENCODING = 'f'; + private static final char TRUE_ENCODING = 't'; + private static final char BINARY_ENCODING = 'b'; + private static final char CBOR_OBJECT_ENCODING = 'c'; + private static final char JSON_OBJECT_ENCODING = 'j'; + private static final char YAML_OBJECT_ENCODING = 'y'; + private static final char SMILE_OBJECT_ENCODING = 's'; + + private enum TypeUtils { + STRING(STRING_ENCODING) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, parser.text()); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] text = parser.text().getBytes(StandardCharsets.UTF_8); + byte[] bytes = new byte[text.length + 1]; + bytes[0] = getEncoding(); + System.arraycopy(text, 0, bytes, 1, text.length); + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + b.value(new BytesRef(r.bytes, r.offset + 1, r.length - 1).utf8ToString()); + } + }, + INTEGER(INTEGER_ENCODING) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, parser.intValue()); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] bytes = new byte[5]; + bytes[0] = getEncoding(); + ByteUtils.writeIntLE(parser.intValue(), bytes, 1); + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + b.value(ByteUtils.readIntLE(r.bytes, 1 + r.offset)); + } + }, + LONG(LONG_ENCODING) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, parser.longValue()); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] bytes = new byte[9]; + bytes[0] = getEncoding(); + ByteUtils.writeLongLE(parser.longValue(), bytes, 1); + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + b.value(ByteUtils.readLongLE(r.bytes, 1 + r.offset)); + } + }, + DOUBLE(DOUBLE_ENCODING) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, parser.doubleValue()); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] bytes = new byte[9]; + bytes[0] = getEncoding(); + ByteUtils.writeDoubleLE(parser.doubleValue(), bytes, 1); + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + b.value(ByteUtils.readDoubleLE(r.bytes, 1 + r.offset)); + } + }, + FLOAT(FLOAT_ENCODING) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, parser.floatValue()); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] bytes = new byte[5]; + bytes[0] = getEncoding(); + ByteUtils.writeFloatLE(parser.floatValue(), bytes, 1); + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + b.value(ByteUtils.readFloatLE(r.bytes, 1 + r.offset)); + } + }, + BIG_INTEGER(BIG_INTEGER_ENCODING) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, encode(parser)); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] bytes = encode((BigInteger) parser.numberValue(), getEncoding()); + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + b.value(new BigInteger(r.bytes, r.offset + 1, r.length - 1)); + } + }, + BIG_DECIMAL(BIG_DECIMAL_ENCODING) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, encode(parser)); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] bytes = encode((BigDecimal) parser.numberValue(), getEncoding()); + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + if (r.length < 5) { + throw new IllegalArgumentException("Can't decode " + r); + } + int scale = ByteUtils.readIntLE(r.bytes, r.offset + 1); + b.value(new BigDecimal(new BigInteger(r.bytes, r.offset + 5, r.length - 5), scale)); + } + }, + BOOLEAN(new Character[] { TRUE_ENCODING, FALSE_ENCODING }) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, encode(parser)); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] bytes = new byte[] { parser.booleanValue() ? (byte) 't' : (byte) 'f' }; + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + if (r.length != 1) { + throw new IllegalArgumentException("Can't decode " + r); + } + assert r.bytes[r.offset] == 't' || r.bytes[r.offset] == 'f' : r.bytes[r.offset]; + b.value(r.bytes[r.offset] == 't'); + } + }, + EMBEDDED_OBJECT(BINARY_ENCODING) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, encode(parser.binaryValue())); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + byte[] bytes = encode(parser.binaryValue()); + assertValidEncoding(bytes); + return bytes; + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + b.value(r.bytes, r.offset + 1, r.length - 1); + } + }, + START(new Character[] { CBOR_OBJECT_ENCODING, JSON_OBJECT_ENCODING, YAML_OBJECT_ENCODING, SMILE_OBJECT_ENCODING }) { + @Override + StoredField buildStoredField(String name, XContentParser parser) throws IOException { + return new StoredField(name, encode(parser)); + } + + @Override + byte[] encode(XContentParser parser) throws IOException { + try (XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent())) { + builder.copyCurrentStructure(parser); + byte[] bytes = encode(builder); + assertValidEncoding(bytes); + return bytes; + } + } + + @Override + void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { + switch ((char) r.bytes[r.offset]) { + case CBOR_OBJECT_ENCODING -> decodeAndWriteXContent(b, XContentType.CBOR, r); + case JSON_OBJECT_ENCODING -> decodeAndWriteXContent(b, XContentType.JSON, r); + case SMILE_OBJECT_ENCODING -> decodeAndWriteXContent(b, XContentType.SMILE, r); + case YAML_OBJECT_ENCODING -> decodeAndWriteXContent(b, XContentType.YAML, r); + default -> throw new IllegalArgumentException("Can't decode " + r); + } + } + }; + + TypeUtils(char encoding) { + this.encoding = new Character[] { encoding }; + } + + TypeUtils(Character[] encoding) { + this.encoding = encoding; + } + + byte getEncoding() { + assert encoding.length == 1; + return (byte) encoding[0].charValue(); + } + + void assertValidEncoding(byte[] encodedValue) { + assert Arrays.asList(encoding).contains((char) encodedValue[0]); + } + + final Character[] encoding; + + abstract StoredField buildStoredField(String name, XContentParser parser) throws IOException; + + abstract byte[] encode(XContentParser parser) throws IOException; + + abstract void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException; + + static byte[] encode(BigInteger n, Byte encoding) throws IOException { + byte[] twosCompliment = n.toByteArray(); + byte[] encoded = new byte[1 + twosCompliment.length]; + encoded[0] = encoding; + System.arraycopy(twosCompliment, 0, encoded, 1, twosCompliment.length); + return encoded; + } + + static byte[] encode(BigDecimal n, Byte encoding) { + byte[] twosCompliment = n.unscaledValue().toByteArray(); + byte[] encoded = new byte[5 + twosCompliment.length]; + encoded[0] = 'd'; + ByteUtils.writeIntLE(n.scale(), encoded, 1); + System.arraycopy(twosCompliment, 0, encoded, 5, twosCompliment.length); + return encoded; + } + + static byte[] encode(byte[] b) { + byte[] encoded = new byte[1 + b.length]; + encoded[0] = 'b'; + System.arraycopy(b, 0, encoded, 1, b.length); + return encoded; + } + + static byte[] encode(XContentBuilder builder) throws IOException { + BytesReference b = BytesReference.bytes(builder); + byte[] encoded = new byte[1 + b.length()]; + encoded[0] = switch (builder.contentType()) { + case JSON -> JSON_OBJECT_ENCODING; + case SMILE -> SMILE_OBJECT_ENCODING; + case YAML -> YAML_OBJECT_ENCODING; + case CBOR -> CBOR_OBJECT_ENCODING; + default -> throw new IllegalArgumentException("unsupported type " + builder.contentType()); + }; + + int position = 1; + BytesRefIterator itr = b.iterator(); + BytesRef ref; + while ((ref = itr.next()) != null) { + System.arraycopy(ref.bytes, ref.offset, encoded, position, ref.length); + position += ref.length; + } + assert position == encoded.length; + return encoded; + } + + static void decodeAndWriteXContent(XContentBuilder b, XContentType type, BytesRef r) throws IOException { + try ( + XContentParser parser = type.xContent().createParser(XContentParserConfiguration.EMPTY, r.bytes, r.offset + 1, r.length - 1) + ) { + b.copyCurrentStructure(parser); + } + } + } +} diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index b17777fc5a91e..17e0105d59d8c 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -39,6 +39,7 @@ import org.elasticsearch.index.mapper.GeoPointScriptFieldType; import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.IgnoredFieldMapper; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.IndexFieldMapper; import org.elasticsearch.index.mapper.IpFieldMapper; import org.elasticsearch.index.mapper.IpScriptFieldType; @@ -258,6 +259,7 @@ private static Map initBuiltInMetadataMa builtInMetadataMappers.put(TimeSeriesRoutingHashFieldMapper.NAME, TimeSeriesRoutingHashFieldMapper.PARSER); builtInMetadataMappers.put(IndexFieldMapper.NAME, IndexFieldMapper.PARSER); builtInMetadataMappers.put(SourceFieldMapper.NAME, SourceFieldMapper.PARSER); + builtInMetadataMappers.put(IgnoredSourceFieldMapper.NAME, IgnoredSourceFieldMapper.PARSER); builtInMetadataMappers.put(NestedPathFieldMapper.NAME, NestedPathFieldMapper.PARSER); builtInMetadataMappers.put(VersionFieldMapper.NAME, VersionFieldMapper.PARSER); builtInMetadataMappers.put(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PARSER); diff --git a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification index cdb35cb9ac660..a158f91903c70 100644 --- a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification +++ b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -13,4 +13,5 @@ org.elasticsearch.cluster.metadata.MetadataFeatures org.elasticsearch.rest.RestFeatures org.elasticsearch.indices.IndicesFeatures org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures +org.elasticsearch.index.mapper.MapperFeatures org.elasticsearch.search.retriever.RetrieversFeatures diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index c1fd872e89f45..06e70e84bbb67 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -13,6 +13,7 @@ import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.json.JsonXContent; +import org.hamcrest.Matchers; import java.io.IOException; import java.util.List; @@ -97,7 +98,7 @@ public void testSyntheticSourceMany() throws IOException { } }, reader -> { SourceLoader loader = mapper.mappingLookup().newSourceLoader(); - assertTrue(loader.requiredStoredFields().isEmpty()); + assertThat(loader.requiredStoredFields(), Matchers.contains("_ignored_source")); for (LeafReaderContext leaf : reader.leaves()) { int[] docIds = IntStream.range(0, leaf.reader().maxDoc()).toArray(); SourceLoader.Leaf sourceLoaderLeaf = loader.leaf(leaf.reader(), docIds); @@ -129,7 +130,7 @@ public void testSyntheticSourceManyDoNotHave() throws IOException { } }, reader -> { SourceLoader loader = mapper.mappingLookup().newSourceLoader(); - assertTrue(loader.requiredStoredFields().isEmpty()); + assertThat(loader.requiredStoredFields(), Matchers.contains("_ignored_source")); for (LeafReaderContext leaf : reader.leaves()) { int[] docIds = IntStream.range(0, leaf.reader().maxDoc()).toArray(); SourceLoader.Leaf sourceLoaderLeaf = loader.leaf(leaf.reader(), docIds); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentMapperTests.java index 486b33d9b155a..c210fb0654683 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentMapperTests.java @@ -319,6 +319,7 @@ public void testEmptyDocumentMapper() { .item(DocCountFieldMapper.class) .item(FieldNamesFieldMapper.class) .item(IgnoredFieldMapper.class) + .item(IgnoredSourceFieldMapper.class) .item(IndexFieldMapper.class) .item(NestedPathFieldMapper.class) .item(ProvidedIdFieldMapper.class) @@ -336,6 +337,7 @@ public void testEmptyDocumentMapper() { .item(FieldNamesFieldMapper.CONTENT_TYPE) .item(IdFieldMapper.CONTENT_TYPE) .item(IgnoredFieldMapper.CONTENT_TYPE) + .item(IgnoredSourceFieldMapper.NAME) .item(IndexFieldMapper.CONTENT_TYPE) .item(NestedPathFieldMapper.NAME) .item(RoutingFieldMapper.CONTENT_TYPE) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java new file mode 100644 index 0000000000000..a21c3993d4f2b --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -0,0 +1,148 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.xcontent.XContentBuilder; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.Base64; + +public class IgnoredSourceFieldMapperTests extends MapperServiceTestCase { + + private String getSyntheticSource(CheckedConsumer build) throws IOException { + DocumentMapper documentMapper = createMapperService( + Settings.builder() + .put("index.mapping.total_fields.limit", 2) + .put("index.mapping.total_fields.ignore_dynamic_beyond_limit", true) + .build(), + syntheticSourceMapping(b -> { + b.startObject("foo").field("type", "keyword").endObject(); + b.startObject("bar").field("type", "object").endObject(); + }) + ).documentMapper(); + return syntheticSource(documentMapper, build); + } + + public void testIgnoredBoolean() throws IOException { + boolean value = randomBoolean(); + assertEquals("{\"my_value\":" + value + "}", getSyntheticSource(b -> b.field("my_value", value))); + } + + public void testIgnoredString() throws IOException { + String value = randomAlphaOfLength(5); + assertEquals("{\"my_value\":\"" + value + "\"}", getSyntheticSource(b -> b.field("my_value", value))); + } + + public void testIgnoredInt() throws IOException { + int value = randomInt(); + assertEquals("{\"my_value\":" + value + "}", getSyntheticSource(b -> b.field("my_value", value))); + } + + public void testIgnoredLong() throws IOException { + long value = randomLong(); + assertEquals("{\"my_value\":" + value + "}", getSyntheticSource(b -> b.field("my_value", value))); + } + + public void testIgnoredFloat() throws IOException { + float value = randomFloat(); + assertEquals("{\"my_value\":" + value + "}", getSyntheticSource(b -> b.field("my_value", value))); + } + + public void testIgnoredDouble() throws IOException { + double value = randomDouble(); + assertEquals("{\"my_value\":" + value + "}", getSyntheticSource(b -> b.field("my_value", value))); + } + + public void testIgnoredBigInteger() throws IOException { + BigInteger value = randomBigInteger(); + assertEquals("{\"my_value\":" + value + "}", getSyntheticSource(b -> b.field("my_value", value))); + } + + public void testIgnoredBytes() throws IOException { + byte[] value = randomByteArrayOfLength(10); + assertEquals( + "{\"my_value\":\"" + Base64.getEncoder().encodeToString(value) + "\"}", + getSyntheticSource(b -> b.field("my_value", value)) + ); + } + + public void testIgnoredObjectBoolean() throws IOException { + boolean value = randomBoolean(); + assertEquals("{\"my_value\":" + value + "}", getSyntheticSource(b -> b.field("my_value", value))); + } + + public void testMultipleIgnoredFieldsRootObject() throws IOException { + boolean booleanValue = randomBoolean(); + int intValue = randomInt(); + String stringValue = randomAlphaOfLength(20); + String syntheticSource = getSyntheticSource(b -> { + b.field("boolean_value", booleanValue); + b.field("int_value", intValue); + b.field("string_value", stringValue); + }); + assertThat(syntheticSource, Matchers.containsString("\"boolean_value\":" + booleanValue)); + assertThat(syntheticSource, Matchers.containsString("\"int_value\":" + intValue)); + assertThat(syntheticSource, Matchers.containsString("\"string_value\":\"" + stringValue + "\"")); + } + + public void testMultipleIgnoredFieldsSameObject() throws IOException { + boolean booleanValue = randomBoolean(); + int intValue = randomInt(); + String stringValue = randomAlphaOfLength(20); + String syntheticSource = getSyntheticSource(b -> { + b.startObject("bar"); + { + b.field("boolean_value", booleanValue); + b.field("int_value", intValue); + b.field("string_value", stringValue); + } + b.endObject(); + }); + assertThat(syntheticSource, Matchers.containsString("{\"bar\":{")); + assertThat(syntheticSource, Matchers.containsString("\"boolean_value\":" + booleanValue)); + assertThat(syntheticSource, Matchers.containsString("\"int_value\":" + intValue)); + assertThat(syntheticSource, Matchers.containsString("\"string_value\":\"" + stringValue + "\"")); + } + + public void testMultipleIgnoredFieldsManyObjects() throws IOException { + boolean booleanValue = randomBoolean(); + int intValue = randomInt(); + String stringValue = randomAlphaOfLength(20); + String syntheticSource = getSyntheticSource(b -> { + b.field("boolean_value", booleanValue); + b.startObject("path"); + { + b.startObject("to"); + { + b.field("int_value", intValue); + b.startObject("some"); + { + b.startObject("deeply"); + { + b.startObject("nested"); + b.field("string_value", stringValue); + b.endObject(); + } + b.endObject(); + } + b.endObject(); + } + b.endObject(); + } + b.endObject(); + }); + assertThat(syntheticSource, Matchers.containsString("\"boolean_value\":" + booleanValue)); + assertThat(syntheticSource, Matchers.containsString("\"path\":{\"to\":{\"int_value\":" + intValue)); + assertThat(syntheticSource, Matchers.containsString("\"some\":{\"deeply\":{\"nested\":{\"string_value\":\"" + stringValue + "\"")); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/XContentDataHelperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/XContentDataHelperTests.java new file mode 100644 index 0000000000000..06db79c3f9fb0 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/XContentDataHelperTests.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.json.JsonXContent; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; + +public class XContentDataHelperTests extends ESTestCase { + + private String encodeAndDecode(String value) throws IOException { + XContentParser p = createParser(JsonXContent.jsonXContent, "{ \"foo\": " + value + " }"); + assertThat(p.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(p.currentName(), equalTo("foo")); + p.nextToken(); + + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.humanReadable(true); + XContentDataHelper.decodeAndWrite(builder, XContentDataHelper.encodeToken(p)); + return Strings.toString(builder); + } + + public void testBoolean() throws IOException { + boolean b = randomBoolean(); + assertEquals(b, Boolean.parseBoolean(encodeAndDecode(Boolean.toString(b)))); + } + + public void testString() throws IOException { + String s = "\"" + randomAlphaOfLength(5) + "\""; + assertEquals(s, encodeAndDecode(s)); + } + + public void testInt() throws IOException { + int i = randomInt(); + assertEquals(i, Integer.parseInt(encodeAndDecode(Integer.toString(i)))); + } + + public void testLong() throws IOException { + long l = randomLong(); + assertEquals(l, Long.parseLong(encodeAndDecode(Long.toString(l)))); + } + + public void testFloat() throws IOException { + float f = randomFloat(); + assertEquals(0, Float.compare(f, Float.parseFloat(encodeAndDecode(Float.toString(f))))); + } + + public void testDouble() throws IOException { + double d = randomDouble(); + assertEquals(0, Double.compare(d, Double.parseDouble(encodeAndDecode(Double.toString(d))))); + } + + public void testBigInteger() throws IOException { + BigInteger i = randomBigInteger(); + assertEquals(i, new BigInteger(encodeAndDecode(i.toString()), 10)); + } + + public void testObject() throws IOException { + String object = "{\"name\":\"foo\"}"; + XContentParser p = createParser(JsonXContent.jsonXContent, object); + assertThat(p.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.humanReadable(true); + XContentDataHelper.decodeAndWrite(builder, XContentDataHelper.encodeToken(p)); + assertEquals(object, Strings.toString(builder)); + } + + public void testArrayInt() throws IOException { + String values = "[" + + String.join(",", List.of(Integer.toString(randomInt()), Integer.toString(randomInt()), Integer.toString(randomInt()))) + + "]"; + assertEquals(values, encodeAndDecode(values)); + } +} diff --git a/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java b/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java index 0216bad7cf7a3..c173a22dcdf57 100644 --- a/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java +++ b/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.IgnoredFieldMapper; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.IndexFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.Mapper; @@ -85,6 +86,7 @@ public Map getMetadataMappers() { TimeSeriesRoutingHashFieldMapper.NAME, IndexFieldMapper.NAME, SourceFieldMapper.NAME, + IgnoredSourceFieldMapper.NAME, NestedPathFieldMapper.NAME, VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, From fccb2e7d04e17e920e145cf590cfc76ae146d409 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 26 Apr 2024 08:41:22 -0400 Subject: [PATCH 03/14] ESQL: Estimate memory usage on `Block.Builder` (#107923) This adds a method to `Block.Builder` that estimates the number of bytes that'll be used by the `Block` that it builds. It's not always accurate, but it is directional. --- .../compute/data/BooleanArrayBlock.java | 2 +- .../data/BooleanVectorFixedBuilder.java | 5 ++++ .../compute/data/BytesRefArrayBlock.java | 2 +- .../compute/data/BytesRefBlockBuilder.java | 5 ++++ .../compute/data/DoubleArrayBlock.java | 2 +- .../data/DoubleVectorFixedBuilder.java | 5 ++++ .../compute/data/IntArrayBlock.java | 2 +- .../elasticsearch/compute/data/IntBlock.java | 7 ------ .../compute/data/IntBlockBuilder.java | 5 ---- .../compute/data/IntVectorFixedBuilder.java | 5 ++++ .../compute/data/LongArrayBlock.java | 2 +- .../compute/data/LongVectorFixedBuilder.java | 5 ++++ .../compute/data/AbstractBlockBuilder.java | 5 ++++ .../compute/data/AbstractVectorBuilder.java | 5 ++++ .../org/elasticsearch/compute/data/Block.java | 7 ++++++ .../compute/data/ConstantNullBlock.java | 5 ++++ .../elasticsearch/compute/data/DocBlock.java | 5 ++++ .../elasticsearch/compute/data/DocVector.java | 2 +- .../data/SingletonOrdinalsBuilder.java | 20 +++++++++++---- .../elasticsearch/compute/data/Vector.java | 7 ++++++ .../compute/data/X-ArrayBlock.java.st | 2 +- .../compute/data/X-Block.java.st | 9 ------- .../compute/data/X-BlockBuilder.java.st | 11 ++++---- .../compute/data/X-VectorFixedBuilder.java.st | 5 ++++ .../compute/data/BlockBuilderTests.java | 11 ++++++++ .../compute/data/TestBlockBuilder.java | 25 +++++++++++++++++++ 26 files changed, 127 insertions(+), 39 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java index 710eb17f72f6a..2ec68d268ae8a 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java @@ -20,7 +20,7 @@ */ final class BooleanArrayBlock extends AbstractArrayBlock implements BooleanBlock { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BooleanArrayBlock.class); + static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BooleanArrayBlock.class); private final BooleanArrayVector vector; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorFixedBuilder.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorFixedBuilder.java index 5977dc5de36f0..4cc2ec17b6ad4 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorFixedBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorFixedBuilder.java @@ -46,6 +46,11 @@ private static long ramBytesUsed(int size) { ); } + @Override + public long estimatedBytes() { + return ramBytesUsed(values.length); + } + @Override public BooleanVector build() { if (nextIndex < 0) { diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayBlock.java index 6cc66183db2ed..8eaf07b473a3a 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayBlock.java @@ -23,7 +23,7 @@ */ final class BytesRefArrayBlock extends AbstractArrayBlock implements BytesRefBlock { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BytesRefArrayBlock.class); + static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BytesRefArrayBlock.class); private final BytesRefArrayVector vector; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlockBuilder.java index 4ef7ed4084228..49075789ed4a4 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlockBuilder.java @@ -140,6 +140,11 @@ public BytesRefBlockBuilder mvOrdering(Block.MvOrdering mvOrdering) { return this; } + @Override + public long estimatedBytes() { + return super.estimatedBytes() + BytesRefArrayBlock.BASE_RAM_BYTES_USED + values.ramBytesUsed(); + } + private BytesRefBlock buildFromBytesArray() { assert estimatedBytes == 0 || firstValueIndexes != null; final BytesRefBlock theBlock; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayBlock.java index d872a4938a734..d545fca4fca8d 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayBlock.java @@ -20,7 +20,7 @@ */ final class DoubleArrayBlock extends AbstractArrayBlock implements DoubleBlock { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DoubleArrayBlock.class); + static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DoubleArrayBlock.class); private final DoubleArrayVector vector; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorFixedBuilder.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorFixedBuilder.java index c58856afa0266..42cdd0f5667ff 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorFixedBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorFixedBuilder.java @@ -46,6 +46,11 @@ private static long ramBytesUsed(int size) { ); } + @Override + public long estimatedBytes() { + return ramBytesUsed(values.length); + } + @Override public DoubleVector build() { if (nextIndex < 0) { diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayBlock.java index 492769d1f3d43..41c9d3b84485d 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayBlock.java @@ -20,7 +20,7 @@ */ final class IntArrayBlock extends AbstractArrayBlock implements IntBlock { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(IntArrayBlock.class); + static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(IntArrayBlock.class); private final IntArrayVector vector; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlock.java index 2747862d534b7..e9d606b51c6a1 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlock.java @@ -223,13 +223,6 @@ sealed interface Builder extends Block.Builder, BlockLoader.IntBuilder permits I @Override Builder mvOrdering(Block.MvOrdering mvOrdering); - /** - * An estimate of the number of bytes the {@link IntBlock} created by - * {@link #build} will use. This may overestimate the size but shouldn't - * underestimate it. - */ - long estimatedBytes(); - @Override IntBlock build(); } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlockBuilder.java index 886bf98f4e049..85f943004de29 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlockBuilder.java @@ -182,9 +182,4 @@ public IntBlock build() { throw e; } } - - @Override - public long estimatedBytes() { - return estimatedBytes; - } } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorFixedBuilder.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorFixedBuilder.java index b143e9d592dc6..77e3511a5cb54 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorFixedBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorFixedBuilder.java @@ -46,6 +46,11 @@ private static long ramBytesUsed(int size) { ); } + @Override + public long estimatedBytes() { + return ramBytesUsed(values.length); + } + @Override public IntVector build() { if (nextIndex < 0) { diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayBlock.java index 77ae863e41ff0..56370f718bae0 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayBlock.java @@ -20,7 +20,7 @@ */ final class LongArrayBlock extends AbstractArrayBlock implements LongBlock { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(LongArrayBlock.class); + static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(LongArrayBlock.class); private final LongArrayVector vector; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorFixedBuilder.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorFixedBuilder.java index ccf87da153667..2ad259198bf1b 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorFixedBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorFixedBuilder.java @@ -46,6 +46,11 @@ private static long ramBytesUsed(int size) { ); } + @Override + public long estimatedBytes() { + return ramBytesUsed(values.length); + } + @Override public LongVector build() { if (nextIndex < 0) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/AbstractBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/AbstractBlockBuilder.java index abf3a243b7682..5fac64735155d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/AbstractBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/AbstractBlockBuilder.java @@ -120,6 +120,11 @@ protected final void finish() { } } + @Override + public long estimatedBytes() { + return estimatedBytes; + } + /** * Called during implementations of {@link Block.Builder#build} as a last step * to mark the Builder as closed and make sure that further closes don't double diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/AbstractVectorBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/AbstractVectorBuilder.java index 0f86a79700b4b..7ee4ff2441f4e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/AbstractVectorBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/AbstractVectorBuilder.java @@ -62,6 +62,11 @@ protected final void finish() { } } + @Override + public long estimatedBytes() { + return estimatedBytes; + } + /** * Called during implementations of {@link Block.Builder#build} as a last step * to mark the Builder as closed and make sure that further closes don't double diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java index 1e6422a5c31da..709ad4165170d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java @@ -200,6 +200,13 @@ interface Builder extends BlockLoader.Builder, Releasable { */ Builder mvOrdering(Block.MvOrdering mvOrdering); + /** + * An estimate of the number of bytes the {@link Block} created by + * {@link #build} will use. This may overestimate the size but shouldn't + * underestimate it. + */ + long estimatedBytes(); + /** * Builds the block. This method can be called multiple times. */ diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index 3df75f4bc1c56..bdeb5334e0da7 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -182,6 +182,11 @@ public Block.Builder mvOrdering(MvOrdering mvOrdering) { return this; } + @Override + public long estimatedBytes() { + return BASE_RAM_BYTES_USED; + } + @Override public Block build() { if (closed) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java index 2751cd31fd362..f454abe7d2cfe 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java @@ -160,6 +160,11 @@ public Block.Builder mvOrdering(MvOrdering mvOrdering) { return this; } + @Override + public long estimatedBytes() { + return DocVector.BASE_RAM_BYTES_USED + shards.estimatedBytes() + segments.estimatedBytes() + docs.estimatedBytes(); + } + @Override public DocBlock build() { // Pass null for singleSegmentNonDecreasing so we calculate it when we first need it. diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java index 2404217d11f95..067fddd311cc7 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java @@ -18,7 +18,7 @@ */ public final class DocVector extends AbstractVector implements Vector { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DocVector.class); + static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DocVector.class); /** * Per position memory cost to build the shard segment doc map required diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java index fd9dd6a479298..576bde5cdf676 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java @@ -151,13 +151,23 @@ BytesRefBlock buildRegularBlock() { } } + @Override + public long estimatedBytes() { + /* + * This is a *terrible* estimate because we have no idea how big the + * values in the ordinals are. + */ + long overhead = shouldBuildOrdinalsBlock() ? 5 : 20; + return ords.length * overhead; + } + @Override public BytesRefBlock build() { - if (ords.length >= 2 * docValues.getValueCount() && ords.length >= 32) { - return buildOrdinal(); - } else { - return buildRegularBlock(); - } + return shouldBuildOrdinalsBlock() ? buildOrdinal() : buildRegularBlock(); + } + + boolean shouldBuildOrdinalsBlock() { + return ords.length >= 2 * docValues.getValueCount() && ords.length >= 32; } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java index 84722fad93b7f..89b39569be454 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java @@ -62,6 +62,13 @@ public interface Vector extends Accountable, RefCounted, Releasable { * This is {@link Releasable} and should be released after building the vector or if building the vector fails. */ interface Builder extends Releasable { + /** + * An estimate of the number of bytes the {@link Vector} created by + * {@link #build} will use. This may overestimate the size but shouldn't + * underestimate it. + */ + long estimatedBytes(); + /** * Builds the block. This method can be called multiple times. */ diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st index a7c5f10032394..9b153317c8a0e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st @@ -32,7 +32,7 @@ $endif$ */ final class $Type$ArrayBlock extends AbstractArrayBlock implements $Type$Block { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance($Type$ArrayBlock.class); + static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance($Type$ArrayBlock.class); private final $Type$ArrayVector vector; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st index b82061b85760a..331a5713fa3d1 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st @@ -277,15 +277,6 @@ $endif$ @Override Builder mvOrdering(Block.MvOrdering mvOrdering); -$if(int)$ - /** - * An estimate of the number of bytes the {@link IntBlock} created by - * {@link #build} will use. This may overestimate the size but shouldn't - * underestimate it. - */ - long estimatedBytes(); - -$endif$ @Override $Type$Block build(); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BlockBuilder.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BlockBuilder.java.st index 347f37cd7828d..0d3d2293a1bb1 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BlockBuilder.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BlockBuilder.java.st @@ -188,6 +188,11 @@ $endif$ } $if(BytesRef)$ + @Override + public long estimatedBytes() { + return super.estimatedBytes() + BytesRefArrayBlock.BASE_RAM_BYTES_USED + values.ramBytesUsed(); + } + private $Type$Block buildFromBytesArray() { assert estimatedBytes == 0 || firstValueIndexes != null; final $Type$Block theBlock; @@ -295,11 +300,5 @@ $if(BytesRef)$ public void extraClose() { Releasables.closeExpectNoException(values); } -$elseif(int)$ - - @Override - public long estimatedBytes() { - return estimatedBytes; - } $endif$ } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorFixedBuilder.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorFixedBuilder.java.st index 43401d59095f4..af783a2435251 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorFixedBuilder.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorFixedBuilder.java.st @@ -46,6 +46,11 @@ final class $Type$VectorFixedBuilder implements $Type$Vector.FixedBuilder { ); } + @Override + public long estimatedBytes() { + return ramBytesUsed(values.length); + } + @Override public $Type$Vector build() { if (nextIndex < 0) { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderTests.java index a48e22e9ccefa..6b5c37ee26888 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderTests.java @@ -21,8 +21,11 @@ import java.util.ArrayList; import java.util.List; +import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThan; public class BlockBuilderTests extends ESTestCase { @ParametersFactory @@ -58,6 +61,10 @@ private void testAllNullsImpl(Block.Builder builder, int numEntries) { for (int i = 0; i < numEntries; i++) { builder.appendNull(); } + assertThat( + builder.estimatedBytes(), + both(greaterThan(blockFactory.breaker().getUsed() - 1024)).and(lessThan(blockFactory.breaker().getUsed() + 1024)) + ); try (Block block = builder.build()) { assertThat(block.getPositionCount(), is(numEntries)); for (int p = 0; p < numEntries; p++) { @@ -113,6 +120,10 @@ private void testBuild(int size, boolean nullable, int maxValueCount) { try (Block.Builder builder = elementType.newBlockBuilder(randomBoolean() ? size : 1, blockFactory)) { BasicBlockTests.RandomBlock random = BasicBlockTests.randomBlock(elementType, size, nullable, 1, maxValueCount, 0, 0); builder.copyFrom(random.block(), 0, random.block().getPositionCount()); + assertThat( + builder.estimatedBytes(), + both(greaterThan(blockFactory.breaker().getUsed() - 1024)).and(lessThan(blockFactory.breaker().getUsed() + 1024)) + ); try (Block built = builder.build()) { assertThat(built, equalTo(random.block())); assertThat(blockFactory.breaker().getUsed(), equalTo(built.ramBytesUsed())); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/TestBlockBuilder.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/TestBlockBuilder.java index 4595b26ca27aa..33a3531481df9 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/TestBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/TestBlockBuilder.java @@ -113,6 +113,11 @@ public TestBlockBuilder mvOrdering(Block.MvOrdering mvOrdering) { return this; } + @Override + public long estimatedBytes() { + return builder.estimatedBytes(); + } + @Override public IntBlock build() { return builder.build(); @@ -168,6 +173,11 @@ public TestBlockBuilder mvOrdering(Block.MvOrdering mvOrdering) { return this; } + @Override + public long estimatedBytes() { + return builder.estimatedBytes(); + } + @Override public LongBlock build() { return builder.build(); @@ -223,6 +233,11 @@ public TestBlockBuilder mvOrdering(Block.MvOrdering mvOrdering) { return this; } + @Override + public long estimatedBytes() { + return builder.estimatedBytes(); + } + @Override public DoubleBlock build() { return builder.build(); @@ -278,6 +293,11 @@ public TestBlockBuilder mvOrdering(Block.MvOrdering mvOrdering) { return this; } + @Override + public long estimatedBytes() { + return builder.estimatedBytes(); + } + @Override public BytesRefBlock build() { return builder.build(); @@ -336,6 +356,11 @@ public TestBlockBuilder mvOrdering(Block.MvOrdering mvOrdering) { return this; } + @Override + public long estimatedBytes() { + return builder.estimatedBytes(); + } + @Override public BooleanBlock build() { return builder.build(); From 1507c8767e27762a1fe4ac6fe57bcfbf516c5ba2 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 26 Apr 2024 08:45:27 -0400 Subject: [PATCH 04/14] ESQL: Add a `HashLookupOperator` (#107894) Adds and operator takes a `Block` on construction, builds a `BlockHash`, and uses it to resolve keys to row offsets. Example time! Say we hand construct the operator with the first two columns of this `Page`: | a | b | v | | -:| --:| --:| | 1 | 11 | 21 | | 2 | 12 | 22 | | 2 | 14 | 23 | | 2 | 11 | 24 | If we then fire this the first two columns of this `Page` into it, we'll get the third column: | a | b | ord | | -----:| --:| -----:| | 2 | 14 | 2 | | 1 | 11 | 0 | | 3 | 11 | null | | [1,2] | 11 | [0,3] | This is the first half of the of the `Operator` side of a hash join. The second half is looking up values from those row offsets. That'd mean adding the `v` column like so: | a | b | ord | v | | -----:| --:| -----:| -------:| | 2 | 14 | 2 | 23 | | 1 | 11 | 0 | 21 | | 3 | 11 | null | null | | [1,2] | 11 | [0,3] | [21,24] | And *that* is comparatively simple. Notice that I said this is the *Operator* side of a hash join. There's no planning or distributed execution involved. Yet. And a hash join is something you'd distribute. This `Operator` can run on a data node or a coordinating node. It doesn't care. It just needs an input. --- .../org/elasticsearch/TransportVersions.java | 1 + .../aggregation/blockhash/BlockHash.java | 7 + .../blockhash/PackedValuesBlockHash.java | 78 +++-- .../org/elasticsearch/compute/data/Page.java | 32 ++ ...AbstractPageMappingToIteratorOperator.java | 291 ++++++++++++++++++ .../compute/operator/HashLookupOperator.java | 138 +++++++++ .../compute/operator/ProjectOperator.java | 26 +- .../elasticsearch/compute/OperatorTests.java | 70 +++++ ...eMappingToIteratorOperatorStatusTests.java | 60 ++++ .../operator/HashLookupOperatorTests.java | 48 +++ .../operator/IteratorAppendPageTests.java | 116 +++++++ .../operator/IteratorRemovePageTests.java | 118 +++++++ .../compute/operator/OperatorTestCase.java | 8 +- .../xpack/esql/plugin/EsqlPlugin.java | 2 + 14 files changed, 923 insertions(+), 72 deletions(-) create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AbstractPageMappingToIteratorOperator.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashLookupOperator.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/AbstractPageMappingToIteratorOperatorStatusTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashLookupOperatorTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/IteratorAppendPageTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/IteratorRemovePageTests.java diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 2bdb3368e1b5c..fc4323e418b72 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -183,6 +183,7 @@ static TransportVersion def(int id) { public static final TransportVersion INDEX_SEGMENTS_VECTOR_FORMATS = def(8_642_00_0); public static final TransportVersion ADD_RESOURCE_ALREADY_UPLOADED_EXCEPTION = def(8_643_00_0); public static final TransportVersion ESQL_MV_ORDERING_SORTED_ASCENDING = def(8_644_00_0); + public static final TransportVersion ESQL_PAGE_MAPPING_TO_ITERATOR = def(8_645_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java index 1e7ecebc16a62..431d8fe3bcd5d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java @@ -111,6 +111,13 @@ public static BlockHash build(List groups, BlockFactory blockFactory, return new PackedValuesBlockHash(groups, blockFactory, emitBatchSize); } + /** + * Temporary method to build a {@link PackedValuesBlockHash}. + */ + public static BlockHash buildPackedValuesBlockHash(List groups, BlockFactory blockFactory, int emitBatchSize) { + return new PackedValuesBlockHash(groups, blockFactory, emitBatchSize); + } + /** * Creates a specialized hash table that maps a {@link Block} of the given input element type to ids. */ diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/PackedValuesBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/PackedValuesBlockHash.java index 85c535faf3180..769155db5ecfa 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/PackedValuesBlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/PackedValuesBlockHash.java @@ -9,7 +9,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; -import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.BitArray; @@ -24,6 +23,7 @@ import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.operator.mvdedupe.BatchEncoder; import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe; +import org.elasticsearch.core.Releasable; import org.elasticsearch.core.ReleasableIterator; import org.elasticsearch.core.Releasables; @@ -65,14 +65,14 @@ final class PackedValuesBlockHash extends BlockHash { private final BytesRefHash bytesRefHash; private final int nullTrackingBytes; private final BytesRefBuilder bytes = new BytesRefBuilder(); - private final Group[] groups; + private final List specs; PackedValuesBlockHash(List specs, BlockFactory blockFactory, int emitBatchSize) { super(blockFactory); - this.groups = specs.stream().map(Group::new).toArray(Group[]::new); + this.specs = specs; this.emitBatchSize = emitBatchSize; this.bytesRefHash = new BytesRefHash(1, blockFactory.bigArrays()); - this.nullTrackingBytes = (groups.length + 7) / 8; + this.nullTrackingBytes = (specs.size() + 7) / 8; bytes.grow(nullTrackingBytes); } @@ -90,9 +90,9 @@ void add(Page page, GroupingAggregatorFunction.AddInput addInput, int batchSize) /** * The on-heap representation of a {@code for} loop for each group key. */ - private static class Group { + private static class Group implements Releasable { final GroupSpec spec; - BatchEncoder encoder; + final BatchEncoder encoder; int positionOffset; int valueOffset; /** @@ -107,18 +107,25 @@ private static class Group { int valueCount; int bytesStart; - Group(GroupSpec spec) { + Group(GroupSpec spec, Page page, int batchSize) { this.spec = spec; + this.encoder = MultivalueDedupe.batchEncoder(page.getBlock(spec.channel()), batchSize, true); + } + + @Override + public void close() { + encoder.close(); } } class AddWork extends AbstractAddBlock { + final Group[] groups; final int positionCount; int position; AddWork(Page page, GroupingAggregatorFunction.AddInput addInput, int batchSize) { super(blockFactory, emitBatchSize, addInput); - initializeGroupsForPage(page, batchSize); + this.groups = specs.stream().map(s -> new Group(s, page, batchSize)).toArray(Group[]::new); this.positionCount = page.getPositionCount(); } @@ -129,7 +136,7 @@ class AddWork extends AbstractAddBlock { */ void add() { for (position = 0; position < positionCount; position++) { - boolean singleEntry = startPosition(); + boolean singleEntry = startPosition(groups); if (singleEntry) { addSingleEntry(); } else { @@ -140,7 +147,7 @@ void add() { } private void addSingleEntry() { - fillBytesSv(); + fillBytesSv(groups); ords.appendInt(Math.toIntExact(hashOrdToGroup(bytesRefHash.add(bytes.get())))); addedValue(position); } @@ -149,13 +156,13 @@ private void addMultipleEntries() { ords.beginPositionEntry(); int g = 0; do { - fillBytesMv(g); + fillBytesMv(groups, g); // emit ords ords.appendInt(Math.toIntExact(hashOrdToGroup(bytesRefHash.add(bytes.get())))); addedValueInMultivaluePosition(position); - g = rewindKeys(); + g = rewindKeys(groups); } while (g >= 0); ords.endPositionEntry(); for (Group group : groups) { @@ -165,10 +172,7 @@ private void addMultipleEntries() { @Override public void close() { - Releasables.closeExpectNoException( - super::close, - Releasables.wrap(() -> Iterators.map(Iterators.forArray(groups), g -> g.encoder)) - ); + Releasables.closeExpectNoException(super::close, Releasables.wrap(groups)); } } @@ -178,14 +182,15 @@ public ReleasableIterator lookup(Page page, ByteSizeValue targetBlockS } class LookupWork implements ReleasableIterator { + private final Group[] groups; private final long targetBytesSize; private final int positionCount; private int position; LookupWork(Page page, long targetBytesSize, int batchSize) { + this.groups = specs.stream().map(s -> new Group(s, page, batchSize)).toArray(Group[]::new); this.positionCount = page.getPositionCount(); this.targetBytesSize = targetBytesSize; - initializeGroupsForPage(page, batchSize); } @Override @@ -198,7 +203,7 @@ public IntBlock next() { int size = Math.toIntExact(Math.min(Integer.MAX_VALUE, targetBytesSize / Integer.BYTES / 2)); try (IntBlock.Builder ords = blockFactory.newIntBlockBuilder(size)) { while (position < positionCount && ords.estimatedBytes() < targetBytesSize) { - boolean singleEntry = startPosition(); + boolean singleEntry = startPosition(groups); if (singleEntry) { lookupSingleEntry(ords); } else { @@ -211,7 +216,7 @@ public IntBlock next() { } private void lookupSingleEntry(IntBlock.Builder ords) { - fillBytesSv(); + fillBytesSv(groups); long found = bytesRefHash.find(bytes.get()); if (found < 0) { ords.appendNull(); @@ -226,7 +231,7 @@ private void lookupMultipleEntries(IntBlock.Builder ords) { int g = 0; int count = 0; do { - fillBytesMv(g); + fillBytesMv(groups, g); // emit ords long found = bytesRefHash.find(bytes.get()); @@ -248,7 +253,7 @@ private void lookupMultipleEntries(IntBlock.Builder ords) { } } } - g = rewindKeys(); + g = rewindKeys(groups); } while (g >= 0); if (firstFound < 0) { ords.appendNull(); @@ -265,24 +270,17 @@ private void lookupMultipleEntries(IntBlock.Builder ords) { @Override public void close() { - Releasables.closeExpectNoException(Releasables.wrap(() -> Iterators.map(Iterators.forArray(groups), g -> g.encoder))); - } - } - - private void initializeGroupsForPage(Page page, int batchSize) { - for (Group group : groups) { - Block b = page.getBlock(group.spec.channel()); - group.encoder = MultivalueDedupe.batchEncoder(b, batchSize, true); + Releasables.closeExpectNoException(groups); } } /** - * Correctly position all {@link #groups}, clear the {@link #bytes}, + * Correctly position all {@code groups}, clear the {@link #bytes}, * and position it past the null tracking bytes. Call this before * encoding a new position. * @return true if this position has only a single ordinal */ - private boolean startPosition() { + private boolean startPosition(Group[] groups) { boolean singleEntry = true; for (Group g : groups) { /* @@ -304,7 +302,7 @@ private boolean startPosition() { return singleEntry; } - private void fillBytesSv() { + private void fillBytesSv(Group[] groups) { for (int g = 0; g < groups.length; g++) { Group group = groups[g]; assert group.writtenValues == 0; @@ -317,7 +315,7 @@ private void fillBytesSv() { } } - private void fillBytesMv(int startingGroup) { + private void fillBytesMv(Group[] groups, int startingGroup) { for (int g = startingGroup; g < groups.length; g++) { Group group = groups[g]; group.bytesStart = bytes.length(); @@ -331,7 +329,7 @@ private void fillBytesMv(int startingGroup) { } } - private int rewindKeys() { + private int rewindKeys(Group[] groups) { int g = groups.length - 1; Group group = groups[g]; bytes.setLength(group.bytesStart); @@ -350,11 +348,11 @@ private int rewindKeys() { @Override public Block[] getKeys() { int size = Math.toIntExact(bytesRefHash.size()); - BatchEncoder.Decoder[] decoders = new BatchEncoder.Decoder[groups.length]; - Block.Builder[] builders = new Block.Builder[groups.length]; + BatchEncoder.Decoder[] decoders = new BatchEncoder.Decoder[specs.size()]; + Block.Builder[] builders = new Block.Builder[specs.size()]; try { for (int g = 0; g < builders.length; g++) { - ElementType elementType = groups[g].spec.elementType(); + ElementType elementType = specs.get(g).elementType(); decoders[g] = BatchEncoder.decoder(elementType); builders[g] = elementType.newBlockBuilder(size, blockFactory); } @@ -424,12 +422,12 @@ public String toString() { StringBuilder b = new StringBuilder(); b.append("PackedValuesBlockHash{groups=["); boolean first = true; - for (int i = 0; i < groups.length; i++) { + for (int i = 0; i < specs.size(); i++) { if (i > 0) { b.append(", "); } - Group group = groups[i]; - b.append(group.spec.channel()).append(':').append(group.spec.elementType()); + GroupSpec spec = specs.get(i); + b.append(spec.channel()).append(':').append(spec.elementType()); } b.append("], entries=").append(bytesRefHash.size()); b.append(", size=").append(ByteSizeValue.ofBytes(bytesRefHash.ramBytesUsed())); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java index bb6e10c0595d8..4d41ab27312c3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java @@ -259,4 +259,36 @@ public Page shallowCopy() { } return new Page(blocks); } + + /** + * Returns a new page with blocks in the containing {@link Block}s + * shifted around or removed. The new {@link Page} will have as + * many blocks as the {@code length} of the provided array. Those + * blocks will be set to the block at the position of the + * value of each entry in the parameter. + */ + public Page projectBlocks(int[] blockMapping) { + if (blocksReleased) { + throw new IllegalStateException("can't read released page"); + } + Block[] mapped = new Block[blockMapping.length]; + try { + for (int b = 0; b < blockMapping.length; b++) { + if (blockMapping[b] >= blocks.length) { + throw new IllegalArgumentException( + "Cannot project block with index [" + blockMapping[b] + "] from a page with size [" + blocks.length + "]" + ); + } + mapped[b] = blocks[blockMapping[b]]; + mapped[b].incRef(); + } + Page result = new Page(false, getPositionCount(), mapped); + mapped = null; + return result; + } finally { + if (mapped != null) { + Releasables.close(mapped); + } + } + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AbstractPageMappingToIteratorOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AbstractPageMappingToIteratorOperator.java new file mode 100644 index 0000000000000..4fb4053b0c0f4 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AbstractPageMappingToIteratorOperator.java @@ -0,0 +1,291 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.ReleasableIterator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Objects; +import java.util.stream.IntStream; + +/** + * Maps a single {@link Page} into zero or more resulting pages. + */ +public abstract class AbstractPageMappingToIteratorOperator implements Operator { + private ReleasableIterator next; + + private boolean finished = false; + + /** + * Number of milliseconds this operation has spent receiving pages. + */ + private long processNanos; + + /** + * Count of pages that have been received by this operator. + */ + private int pagesReceived; + + /** + * Count of pages that have been emitted by this operator. + */ + private int pagesEmitted; + + /** + * Build and Iterator of results for a new page. + */ + protected abstract ReleasableIterator receive(Page page); + + /** + * Append an {@link Iterator} of {@link Block}s to a {@link Page}, one + * after the other. It's required that the iterator emit as many + * positions as there were in the page. + */ + public static ReleasableIterator appendBlocks(Page page, ReleasableIterator toAdd) { + return new AppendBlocksIterator(page, toAdd); + } + + @Override + public abstract String toString(); + + @Override + public final boolean needsInput() { + return finished == false && (next == null || next.hasNext() == false); + } + + @Override + public final void addInput(Page page) { + if (next != null) { + assert next.hasNext() == false : "has pending input page"; + next.close(); + } + if (page.getPositionCount() == 0) { + return; + } + next = new RuntimeTrackingIterator(receive(page)); + pagesReceived++; + } + + @Override + public final void finish() { + finished = true; + } + + @Override + public final boolean isFinished() { + return finished && (next == null || next.hasNext() == false); + } + + @Override + public final Page getOutput() { + if (next == null || next.hasNext() == false) { + return null; + } + Page ret = next.next(); + pagesEmitted++; + return ret; + } + + @Override + public final AbstractPageMappingToIteratorOperator.Status status() { + return status(processNanos, pagesReceived, pagesEmitted); + } + + protected AbstractPageMappingToIteratorOperator.Status status(long processNanos, int pagesReceived, int pagesEmitted) { + return new AbstractPageMappingToIteratorOperator.Status(processNanos, pagesReceived, pagesEmitted); + } + + @Override + public void close() { + Releasables.closeExpectNoException(next); + } + + private class RuntimeTrackingIterator implements ReleasableIterator { + private final ReleasableIterator next; + + private RuntimeTrackingIterator(ReleasableIterator next) { + this.next = next; + } + + @Override + public boolean hasNext() { + return next.hasNext(); + } + + @Override + public Page next() { + long start = System.nanoTime(); + Page out = next.next(); + processNanos += System.nanoTime() - start; + return out; + } + + @Override + public void close() { + next.close(); + } + } + + public static class Status implements Operator.Status { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Operator.Status.class, + "page_mapping_to_iterator", + AbstractPageMappingOperator.Status::new + ); + + private final long processNanos; + private final int pagesReceived; + private final int pagesEmitted; + + public Status(long processNanos, int pagesProcessed, int pagesEmitted) { + this.processNanos = processNanos; + this.pagesReceived = pagesProcessed; + this.pagesEmitted = pagesEmitted; + } + + protected Status(StreamInput in) throws IOException { + processNanos = in.readVLong(); + pagesReceived = in.readVInt(); + pagesEmitted = in.readVInt(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(processNanos); + out.writeVInt(pagesReceived); + out.writeVInt(pagesEmitted); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + public int pagesReceived() { + return pagesReceived; + } + + public int pagesEmitted() { + return pagesEmitted; + } + + public long processNanos() { + return processNanos; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + innerToXContent(builder); + return builder.endObject(); + } + + /** + * Render the body of the object for this status. Protected so subclasses + * can call it to render the "default" body. + */ + protected final XContentBuilder innerToXContent(XContentBuilder builder) throws IOException { + builder.field("process_nanos", processNanos); + if (builder.humanReadable()) { + builder.field("process_time", TimeValue.timeValueNanos(processNanos)); + } + builder.field("pages_received", pagesReceived); + return builder.field("pages_emitted", pagesEmitted); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AbstractPageMappingToIteratorOperator.Status status = (AbstractPageMappingToIteratorOperator.Status) o; + return processNanos == status.processNanos && pagesReceived == status.pagesReceived && pagesEmitted == status.pagesEmitted; + } + + @Override + public int hashCode() { + return Objects.hash(processNanos, pagesReceived, pagesEmitted); + } + + @Override + public String toString() { + return Strings.toString(this); + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ESQL_PAGE_MAPPING_TO_ITERATOR; + } + } + + private static class AppendBlocksIterator implements ReleasableIterator { + private final Page page; + private final ReleasableIterator next; + + private int positionOffset; + + protected AppendBlocksIterator(Page page, ReleasableIterator next) { + this.page = page; + this.next = next; + } + + @Override + public final boolean hasNext() { + if (next.hasNext()) { + assert positionOffset < page.getPositionCount(); + return true; + } + assert positionOffset == page.getPositionCount(); + return false; + } + + @Override + public final Page next() { + Block read = next.next(); + int start = positionOffset; + positionOffset += read.getPositionCount(); + if (start == 0 && read.getPositionCount() == page.getPositionCount()) { + for (int b = 0; b < page.getBlockCount(); b++) { + page.getBlock(b).incRef(); + } + return page.appendBlock(read); + } + Block[] newBlocks = new Block[page.getBlockCount() + 1]; + newBlocks[page.getBlockCount()] = read; + try { + // TODO a way to filter with a range please. + int[] positions = IntStream.range(start, positionOffset).toArray(); + for (int b = 0; b < page.getBlockCount(); b++) { + newBlocks[b] = page.getBlock(b).filter(positions); + } + Page result = new Page(newBlocks); + Arrays.fill(newBlocks, null); + return result; + } finally { + Releasables.closeExpectNoException(newBlocks); + } + } + + @Override + public void close() { + Releasables.closeExpectNoException(page::releaseBlocks, next); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashLookupOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashLookupOperator.java new file mode 100644 index 0000000000000..2b77003f11a4f --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashLookupOperator.java @@ -0,0 +1,138 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; +import org.elasticsearch.compute.aggregation.blockhash.BlockHash; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.ReleasableIterator; +import org.elasticsearch.core.Releasables; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class HashLookupOperator extends AbstractPageMappingToIteratorOperator { + /** + * Factory for {@link HashLookupOperator}. It's received {@link Block}s + * are never closed, so we need to build them from a non-tracking factory. + */ + public static class Factory implements Operator.OperatorFactory { + private final Block[] keys; + private final int[] blockMapping; + + public Factory(Block[] keys, int[] blockMapping) { + this.keys = keys; + this.blockMapping = blockMapping; + } + + @Override + public Operator get(DriverContext driverContext) { + return new HashLookupOperator(driverContext.blockFactory(), keys, blockMapping); + } + + @Override + public String describe() { + StringBuilder b = new StringBuilder(); + b.append("HashLookup[keys=["); + for (int k = 0; k < keys.length; k++) { + Block key = keys[k]; + if (k != 0) { + b.append(", "); + } + b.append("{type=").append(key.elementType()); + b.append(", positions=").append(key.getPositionCount()); + b.append(", size=").append(ByteSizeValue.ofBytes(key.ramBytesUsed())).append("}"); + } + b.append("], mapping=").append(Arrays.toString(blockMapping)).append("]"); + return b.toString(); + } + } + + private final BlockHash hash; + private final int[] blockMapping; + + public HashLookupOperator(BlockFactory blockFactory, Block[] keys, int[] blockMapping) { + this.blockMapping = blockMapping; + List groups = new ArrayList<>(keys.length); + for (int k = 0; k < keys.length; k++) { + groups.add(new BlockHash.GroupSpec(k, keys[k].elementType())); + } + /* + * Force PackedValuesBlockHash because it assigned ordinals in order + * of arrival. We'll figure out how to adapt other block hashes to + * do that soon. Soon we must figure out how to map ordinals to rows. + * And, probably at the same time, handle multiple rows containing + * the same keys. + */ + this.hash = BlockHash.buildPackedValuesBlockHash( + groups, + blockFactory, + (int) BlockFactory.DEFAULT_MAX_BLOCK_PRIMITIVE_ARRAY_SIZE.getBytes() + ); + boolean success = false; + try { + final int[] lastOrd = new int[] { -1 }; + hash.add(new Page(keys), new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + // TODO support multiple rows with the same keys + for (int p = 0; p < groupIds.getPositionCount(); p++) { + int first = groupIds.getFirstValueIndex(p); + int end = groupIds.getValueCount(p) + first; + for (int i = first; i < end; i++) { + int ord = groupIds.getInt(i); + if (ord != lastOrd[0] + 1) { + throw new IllegalArgumentException("found a duplicate row"); + } + lastOrd[0] = ord; + } + } + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + for (int p = 0; p < groupIds.getPositionCount(); p++) { + int ord = groupIds.getInt(p); + if (ord != lastOrd[0] + 1) { + throw new IllegalArgumentException("found a duplicate row"); + } + lastOrd[0] = ord; + } + } + }); + success = true; + } finally { + if (success == false) { + close(); + } + } + } + + @Override + protected ReleasableIterator receive(Page page) { + Page mapped = page.projectBlocks(blockMapping); + page.releaseBlocks(); + return appendBlocks(mapped, hash.lookup(mapped, BlockFactory.DEFAULT_MAX_BLOCK_PRIMITIVE_ARRAY_SIZE)); + } + + @Override + public String toString() { + return "HashLookup[hash=" + hash + ", mapping=" + Arrays.toString(blockMapping) + "]"; + } + + @Override + public void close() { + Releasables.close(super::close, hash); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ProjectOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ProjectOperator.java index 9b4d9d8f11a31..18bbcde41eb6b 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ProjectOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ProjectOperator.java @@ -7,9 +7,7 @@ package org.elasticsearch.compute.operator; -import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.Page; -import org.elasticsearch.core.Releasables; import java.util.Arrays; import java.util.List; @@ -31,7 +29,6 @@ public String describe() { } private final int[] projection; - private final Block[] blocks; /** * Creates an operator that applies the given projection, encoded as an integer list where @@ -42,7 +39,6 @@ public String describe() { */ public ProjectOperator(List projection) { this.projection = projection.stream().mapToInt(Integer::intValue).toArray(); - this.blocks = new Block[projection.size()]; } @Override @@ -51,29 +47,9 @@ protected Page process(Page page) { if (blockCount == 0) { return page; } - Page output = null; try { - int b = 0; - for (int source : projection) { - if (source >= blockCount) { - throw new IllegalArgumentException( - "Cannot project block with index [" + source + "] from a page with size [" + blockCount + "]" - ); - } - var block = page.getBlock(source); - blocks[b++] = block; - block.incRef(); - } - int positionCount = page.getPositionCount(); - // Use positionCount explicitly to avoid re-computing - also, if the projection is empty, there may be - // no more blocks left to determine the positionCount from. - output = new Page(positionCount, blocks); - return output; + return page.projectBlocks(projection); } finally { - if (output == null) { - Releasables.close(blocks); - } - Arrays.fill(blocks, null); page.releaseBlocks(); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java index ef17bea26a14b..805f26e9ef280 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java @@ -37,6 +37,7 @@ import org.elasticsearch.compute.aggregation.blockhash.BlockHash; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BlockTestUtils; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.DocBlock; import org.elasticsearch.compute.data.DocVector; @@ -54,6 +55,7 @@ import org.elasticsearch.compute.operator.Driver; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.HashAggregationOperator; +import org.elasticsearch.compute.operator.HashLookupOperator; import org.elasticsearch.compute.operator.LimitOperator; import org.elasticsearch.compute.operator.Operator; import org.elasticsearch.compute.operator.OperatorTestCase; @@ -71,12 +73,14 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import static org.elasticsearch.compute.aggregation.AggregatorMode.FINAL; import static org.elasticsearch.compute.aggregation.AggregatorMode.INITIAL; @@ -324,6 +328,72 @@ public ScoreMode scoreMode() { return docIds; } + public void testHashLookup() { + // TODO move this to an integration test once we've plugged in the lookup + DriverContext driverContext = driverContext(); + Map primeOrds = new TreeMap<>(); + Block primesBlock; + try (LongBlock.Builder primes = driverContext.blockFactory().newLongBlockBuilder(30)) { + boolean[] sieve = new boolean[100]; + Arrays.fill(sieve, true); + sieve[0] = false; + sieve[1] = false; + int prime = 2; + while (prime < 100) { + if (false == sieve[prime]) { + prime++; + continue; + } + primes.appendLong(prime); + primeOrds.put((long) prime, primeOrds.size()); + for (int m = prime + prime; m < sieve.length; m += prime) { + sieve[m] = false; + } + prime++; + } + primesBlock = primes.build(); + } + try { + List values = new ArrayList<>(); + List expectedValues = new ArrayList<>(); + List expectedPrimeOrds = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + long v = i % 10 == 0 ? randomFrom(primeOrds.keySet()) : randomLongBetween(0, 100); + values.add(v); + expectedValues.add(v); + expectedPrimeOrds.add(primeOrds.get(v)); + } + + var actualValues = new ArrayList<>(); + var actualPrimeOrds = new ArrayList<>(); + try ( + var driver = new Driver( + driverContext, + new SequenceLongBlockSourceOperator(driverContext.blockFactory(), values, 100), + List.of(new HashLookupOperator(driverContext.blockFactory(), new Block[] { primesBlock }, new int[] { 0 })), + new PageConsumerOperator(page -> { + try { + BlockTestUtils.readInto(actualValues, page.getBlock(0)); + BlockTestUtils.readInto(actualPrimeOrds, page.getBlock(1)); + } finally { + page.releaseBlocks(); + } + }), + () -> {} + ) + ) { + OperatorTestCase.runDriver(driver); + } + + assertThat(actualValues, equalTo(expectedValues)); + assertThat(actualPrimeOrds, equalTo(expectedPrimeOrds)); + assertDriverContext(driverContext); + } finally { + primesBlock.close(); + } + + } + /** * Creates a {@link BigArrays} that tracks releases but doesn't throw circuit breaking exceptions. */ diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/AbstractPageMappingToIteratorOperatorStatusTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/AbstractPageMappingToIteratorOperatorStatusTests.java new file mode 100644 index 0000000000000..41db82b9b4c8c --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/AbstractPageMappingToIteratorOperatorStatusTests.java @@ -0,0 +1,60 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class AbstractPageMappingToIteratorOperatorStatusTests extends AbstractWireSerializingTestCase< + AbstractPageMappingToIteratorOperator.Status> { + public static AbstractPageMappingToIteratorOperator.Status simple() { + return new AbstractPageMappingToIteratorOperator.Status(200012, 123, 204); + } + + public static String simpleToJson() { + return """ + { + "process_nanos" : 200012, + "process_time" : "200micros", + "pages_received" : 123, + "pages_emitted" : 204 + }"""; + } + + public void testToXContent() { + assertThat(Strings.toString(simple(), true, true), equalTo(simpleToJson())); + } + + @Override + protected Writeable.Reader instanceReader() { + return AbstractPageMappingToIteratorOperator.Status::new; + } + + @Override + public AbstractPageMappingToIteratorOperator.Status createTestInstance() { + return new AbstractPageMappingToIteratorOperator.Status(randomNonNegativeLong(), randomNonNegativeInt(), randomNonNegativeInt()); + } + + @Override + protected AbstractPageMappingToIteratorOperator.Status mutateInstance(AbstractPageMappingToIteratorOperator.Status instance) { + long processNanos = instance.processNanos(); + int pagesReceived = instance.pagesReceived(); + int pagesEmitted = instance.pagesEmitted(); + switch (between(0, 2)) { + case 0 -> processNanos = randomValueOtherThan(processNanos, ESTestCase::randomNonNegativeLong); + case 1 -> pagesReceived = randomValueOtherThan(pagesReceived, ESTestCase::randomNonNegativeInt); + case 2 -> pagesEmitted = randomValueOtherThan(pagesEmitted, ESTestCase::randomNonNegativeInt); + default -> throw new UnsupportedOperationException(); + } + return new AbstractPageMappingToIteratorOperator.Status(processNanos, pagesReceived, pagesEmitted); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashLookupOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashLookupOperatorTests.java new file mode 100644 index 0000000000000..ec69297718237 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashLookupOperatorTests.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.TestBlockFactory; + +import java.util.List; +import java.util.stream.LongStream; + +import static org.hamcrest.Matchers.equalTo; + +public class HashLookupOperatorTests extends OperatorTestCase { + @Override + protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { + return new SequenceLongBlockSourceOperator(blockFactory, LongStream.range(0, size).map(l -> randomFrom(1, 7, 14, 20))); + } + + @Override + protected void assertSimpleOutput(List input, List results) { + assertThat(results.stream().mapToInt(Page::getPositionCount).sum(), equalTo(input.stream().mapToInt(Page::getPositionCount).sum())); + } + + @Override + protected Operator.OperatorFactory simple() { + return new HashLookupOperator.Factory( + new Block[] { TestBlockFactory.getNonBreakingInstance().newLongArrayVector(new long[] { 7, 14, 20 }, 3).asBlock() }, + new int[] { 0 } + ); + } + + @Override + protected String expectedDescriptionOfSimple() { + return "HashLookup[keys=[{type=LONG, positions=3, size=96b}], mapping=[0]]"; + } + + @Override + protected String expectedToStringOfSimple() { + return "HashLookup[hash=PackedValuesBlockHash{groups=[0:LONG], entries=3, size=536b}, mapping=[0]]"; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/IteratorAppendPageTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/IteratorAppendPageTests.java new file mode 100644 index 0000000000000..ca0ebc64f09a6 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/IteratorAppendPageTests.java @@ -0,0 +1,116 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.TestBlockFactory; +import org.elasticsearch.core.ReleasableIterator; + +import java.util.List; +import java.util.stream.LongStream; + +import static org.hamcrest.Matchers.equalTo; + +/** + * Tests {@link AbstractPageMappingToIteratorOperator} against a test + * subclass that appends {@code 1} and chunks the incoming {@link Page} + * at {@code 100} positions. + */ +public class IteratorAppendPageTests extends OperatorTestCase { + private static final int ADDED_VALUE = 1; + private static final int CHUNK = 100; + + private static class IteratorAppendPage extends AbstractPageMappingToIteratorOperator { + private static class Factory implements Operator.OperatorFactory { + @Override + public Operator get(DriverContext driverContext) { + return new IteratorAppendPage(driverContext.blockFactory()); + } + + @Override + public String describe() { + return "IteratorAppendPage[]"; + } + } + + private final BlockFactory blockFactory; + + private IteratorAppendPage(BlockFactory blockFactory) { + this.blockFactory = blockFactory; + } + + @Override + protected ReleasableIterator receive(Page page) { + return appendBlocks(page, new ReleasableIterator<>() { + private int positionOffset; + + @Override + public boolean hasNext() { + return positionOffset < page.getPositionCount(); + } + + @Override + public Block next() { + if (hasNext() == false) { + throw new IllegalStateException(); + } + int positions = Math.min(page.getPositionCount() - positionOffset, CHUNK); + positionOffset += positions; + return blockFactory.newConstantIntBlockWith(ADDED_VALUE, positions); + } + + @Override + public void close() { + // Nothing to do, appendBlocks iterator closes the page for us. + } + }); + } + + @Override + public String toString() { + return "IteratorAppendPage[]"; + } + } + + @Override + protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { + return new SequenceLongBlockSourceOperator(blockFactory, LongStream.range(0, size).map(l -> randomLong())); + } + + @Override + protected void assertSimpleOutput(List input, List results) { + int r = 0; + for (Page in : input) { + for (int offset = 0; offset < in.getPositionCount(); offset += CHUNK) { + Page resultPage = results.get(r++); + assertThat(resultPage.getPositionCount(), equalTo(Math.min(CHUNK, in.getPositionCount() - offset))); + assertThat( + resultPage.getBlock(1), + equalTo(TestBlockFactory.getNonBreakingInstance().newConstantIntBlockWith(ADDED_VALUE, resultPage.getPositionCount())) + ); + } + } + } + + @Override + protected Operator.OperatorFactory simple() { + return new IteratorAppendPage.Factory(); + } + + @Override + protected String expectedDescriptionOfSimple() { + return "IteratorAppendPage[]"; + } + + @Override + protected String expectedToStringOfSimple() { + return expectedDescriptionOfSimple(); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/IteratorRemovePageTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/IteratorRemovePageTests.java new file mode 100644 index 0000000000000..34943de834f9c --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/IteratorRemovePageTests.java @@ -0,0 +1,118 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.ReleasableIterator; + +import java.util.List; +import java.util.stream.LongStream; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +/** + * Tests {@link AbstractPageMappingToIteratorOperator} against a test + * subclass that removes every other page. + */ +public class IteratorRemovePageTests extends OperatorTestCase { + private static class IteratorRemovePage extends AbstractPageMappingToIteratorOperator { + private static class Factory implements OperatorFactory { + @Override + public Operator get(DriverContext driverContext) { + return new IteratorRemovePage(); + } + + @Override + public String describe() { + return "IteratorRemovePage[]"; + } + } + + private boolean keep = true; + + @Override + protected ReleasableIterator receive(Page page) { + if (keep) { + keep = false; + return new ReleasableIterator<>() { + Page p = page; + + @Override + public boolean hasNext() { + return p != null; + } + + @Override + public Page next() { + Page ret = p; + p = null; + return ret; + } + + @Override + public void close() { + if (p != null) { + p.releaseBlocks(); + } + } + }; + } + keep = true; + page.releaseBlocks(); + return new ReleasableIterator<>() { + @Override + public boolean hasNext() { + return false; + } + + @Override + public Page next() { + throw new UnsupportedOperationException(); + } + + @Override + public void close() {} + }; + } + + @Override + public String toString() { + return "IteratorRemovePage[]"; + } + } + + @Override + protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { + return new SequenceLongBlockSourceOperator(blockFactory, LongStream.range(0, size).map(l -> randomLong())); + } + + @Override + protected void assertSimpleOutput(List input, List results) { + assertThat(results, hasSize((input.size() + 1) / 2)); + for (int i = 0; i < input.size(); i += 2) { + assertThat(input.get(i), equalTo(results.get(i / 2))); + } + } + + @Override + protected Operator.OperatorFactory simple() { + return new IteratorRemovePage.Factory(); + } + + @Override + protected String expectedDescriptionOfSimple() { + return "IteratorRemovePage[]"; + } + + @Override + protected String expectedToStringOfSimple() { + return expectedDescriptionOfSimple(); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/OperatorTestCase.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/OperatorTestCase.java index f8b53a9bcd3c0..eebcbc091d3ea 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/OperatorTestCase.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/OperatorTestCase.java @@ -201,19 +201,13 @@ protected final void assertSimple(DriverContext context, int size) { // Clone the input so that the operator can close it, then, later, we can read it again to build the assertion. List origInput = BlockTestUtils.deepCopyOf(input, TestBlockFactory.getNonBreakingInstance()); - BigArrays bigArrays = context.bigArrays().withCircuitBreaking(); List results = drive(simple().get(context), input.iterator(), context); assertSimpleOutput(origInput, results); - assertThat(bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST).getUsed(), equalTo(0L)); + assertThat(context.breaker().getUsed(), equalTo(0L)); - List resultBlocks = new ArrayList<>(); // Release all result blocks. After this, all input blocks should be released as well, otherwise we have a leak. for (Page p : results) { - for (int i = 0; i < p.getBlockCount(); i++) { - resultBlocks.add(p.getBlock(i)); - } - p.releaseBlocks(); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 1e2557c040b06..043d07777ac4d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -25,6 +25,7 @@ import org.elasticsearch.compute.lucene.LuceneOperator; import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator; import org.elasticsearch.compute.operator.AbstractPageMappingOperator; +import org.elasticsearch.compute.operator.AbstractPageMappingToIteratorOperator; import org.elasticsearch.compute.operator.AggregationOperator; import org.elasticsearch.compute.operator.AsyncOperator; import org.elasticsearch.compute.operator.DriverStatus; @@ -175,6 +176,7 @@ public List getNamedWriteables() { List.of( DriverStatus.ENTRY, AbstractPageMappingOperator.Status.ENTRY, + AbstractPageMappingToIteratorOperator.Status.ENTRY, AggregationOperator.Status.ENTRY, ExchangeSinkOperator.Status.ENTRY, ExchangeSourceOperator.Status.ENTRY, From 1aec77ecfe09f1d1dba59f10315dacbcb96f87d5 Mon Sep 17 00:00:00 2001 From: Mary Gouseti Date: Fri, 26 Apr 2024 16:09:31 +0300 Subject: [PATCH 05/14] Effective retention: improve passing params and add getters (#107943) In oder to be able to enrich the following responses with the `effective_retention` we need to ensure the `XContent.Params` are passed correctly and we have all the getters to copy the response. --- .../get/GetComposableIndexTemplateAction.java | 4 ++++ .../post/SimulateIndexTemplateResponse.java | 16 ++++++++++++++++ .../ExplainDataStreamLifecycleAction.java | 2 +- .../lifecycle/GetDataStreamLifecycleAction.java | 6 +++++- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/template/get/GetComposableIndexTemplateAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/template/get/GetComposableIndexTemplateAction.java index 240fdd2ae8199..f2fcbeff73c37 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/template/get/GetComposableIndexTemplateAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/template/get/GetComposableIndexTemplateAction.java @@ -157,6 +157,10 @@ public Map indexTemplates() { return indexTemplates; } + public RolloverConfiguration getRolloverConfiguration() { + return rolloverConfiguration; + } + public DataStreamGlobalRetention getGlobalRetention() { return globalRetention; } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/template/post/SimulateIndexTemplateResponse.java b/server/src/main/java/org/elasticsearch/action/admin/indices/template/post/SimulateIndexTemplateResponse.java index 4ff38222ccc99..52d40626f97ed 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/template/post/SimulateIndexTemplateResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/template/post/SimulateIndexTemplateResponse.java @@ -69,6 +69,22 @@ public SimulateIndexTemplateResponse( this.globalRetention = globalRetention; } + public Template getResolvedTemplate() { + return resolvedTemplate; + } + + public Map> getOverlappingTemplates() { + return overlappingTemplates; + } + + public RolloverConfiguration getRolloverConfiguration() { + return rolloverConfiguration; + } + + public DataStreamGlobalRetention getGlobalRetention() { + return globalRetention; + } + public SimulateIndexTemplateResponse(StreamInput in) throws IOException { super(in); resolvedTemplate = in.readOptionalWriteable(Template::new); diff --git a/server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/ExplainDataStreamLifecycleAction.java b/server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/ExplainDataStreamLifecycleAction.java index ee4f7fbaa9c59..17d33ae9167fd 100644 --- a/server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/ExplainDataStreamLifecycleAction.java +++ b/server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/ExplainDataStreamLifecycleAction.java @@ -212,7 +212,7 @@ public Iterator toXContentChunked(ToXContent.Params outerP return builder; }), Iterators.map(indices.iterator(), explainIndexDataLifecycle -> (builder, params) -> { builder.field(explainIndexDataLifecycle.getIndex()); - explainIndexDataLifecycle.toXContent(builder, params, rolloverConfiguration, globalRetention); + explainIndexDataLifecycle.toXContent(builder, outerParams, rolloverConfiguration, globalRetention); return builder; }), Iterators.single((builder, params) -> { builder.endObject(); diff --git a/server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/GetDataStreamLifecycleAction.java b/server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/GetDataStreamLifecycleAction.java index d0dd67b4b4db5..1c9dbb0575a1d 100644 --- a/server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/GetDataStreamLifecycleAction.java +++ b/server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/GetDataStreamLifecycleAction.java @@ -220,6 +220,10 @@ public RolloverConfiguration getRolloverConfiguration() { return rolloverConfiguration; } + public DataStreamGlobalRetention getGlobalRetention() { + return globalRetention; + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeCollection(dataStreamLifecycles); @@ -240,7 +244,7 @@ public Iterator toXContentChunked(ToXContent.Params outerParams) { dataStreamLifecycles.iterator(), dataStreamLifecycle -> (builder, params) -> dataStreamLifecycle.toXContent( builder, - params, + outerParams, rolloverConfiguration, globalRetention ) From 98ed236f2bd58c0c3460a77493b231fd37a05cb9 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Fri, 26 Apr 2024 15:16:29 +0200 Subject: [PATCH 06/14] [Connector API] Update docs filtering, configuration, delete, scheduling (#107842) --- .../apis/delete-connector-api.asciidoc | 14 +- ...pdate-connector-configuration-api.asciidoc | 367 +++++++++++------- .../update-connector-filtering-api.asciidoc | 229 ++++++----- .../update-connector-scheduling-api.asciidoc | 30 +- 4 files changed, 394 insertions(+), 246 deletions(-) diff --git a/docs/reference/connector/apis/delete-connector-api.asciidoc b/docs/reference/connector/apis/delete-connector-api.asciidoc index b338f1db2a256..2e7c7a3b60708 100644 --- a/docs/reference/connector/apis/delete-connector-api.asciidoc +++ b/docs/reference/connector/apis/delete-connector-api.asciidoc @@ -28,6 +28,9 @@ Note: this action doesn't delete any API key, ingest pipeline or data index asso ``:: (Required, string) +`delete_sync_jobs`:: +(Optional, boolean) A flag indicating if associated sync jobs should be also removed. Defaults to `false`. + [[delete-connector-api-response-codes]] ==== {api-response-codes-title} @@ -47,7 +50,12 @@ The following example deletes the connector with ID `my-connector`: -------------------------------------------------- PUT _connector/my-connector { - "index_name": "search-google-drive", + "name": "My Connector", + "service_type": "google_drive" +} + +PUT _connector/another-connector +{ "name": "My Connector", "service_type": "google_drive" } @@ -57,7 +65,7 @@ PUT _connector/my-connector [source,console] ---- -DELETE _connector/my-connector +DELETE _connector/another-connector?delete_sync_jobs=true ---- [source,console-result] @@ -66,3 +74,5 @@ DELETE _connector/my-connector "acknowledged": true } ---- + +The following example deletes the connector with ID `another-connector` and its associated sync jobs. diff --git a/docs/reference/connector/apis/update-connector-configuration-api.asciidoc b/docs/reference/connector/apis/update-connector-configuration-api.asciidoc index fea22eb8043b8..256621afb8fc5 100644 --- a/docs/reference/connector/apis/update-connector-configuration-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-configuration-api.asciidoc @@ -6,7 +6,7 @@ preview::[] -Updates a connector's `configuration`, allowing for complete schema modifications or individual value updates within a registered configuration schema. +Updates a connector's `configuration`, allowing for config value updates within a registered configuration schema. [[update-connector-configuration-api-request]] @@ -19,7 +19,8 @@ Updates a connector's `configuration`, allowing for complete schema modification * To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. * The `connector_id` parameter should reference an existing connector. -* The configuration fields definition must be compatible with the specific connector type being used. +* To update configuration `values`, the connector `configuration` schema must be first registered by a running instance of Elastic connector service. +* Make sure configuration fields are compatible with the configuration schema for the third-party data source. Refer to the individual {enterprise-search-ref}/connectors-references.html[connectors references] for details. [[update-connector-configuration-api-path-params]] ==== {api-path-parms-title} @@ -35,57 +36,7 @@ Updates a connector's `configuration`, allowing for complete schema modification (Optional, object) Configuration values for the connector, represented as a mapping of configuration fields to their respective values within a registered schema. `configuration`:: -(Optional, object) The configuration for the connector. The configuration field is a map where each key represents a specific configuration field name, and the value is a `ConnectorConfiguration` object. - -Each `ConnectorConfiguration` object contains the following attributes: - -* `category` (Optional, string) The category of the configuration field. This helps in grouping related configurations together in the user interface. - -* `default_value` (Required, string | number | bool) The default value for the configuration. This value is used if the value field is empty, applicable only for non-required fields. - -* `depends_on` (Required, array of `ConfigurationDependency`) An array of dependencies on other configurations. A field will not be enabled unless these dependencies are met. Each dependency specifies a field key and the required value for the dependency to be considered fulfilled. - -* `display` (Required, string) The display type for the UI element that represents this configuration. This defines how the field should be rendered in the user interface. Supported types are: `text`, `textbox`, `textarea`, `numeric`, `toggle` and `dropdown`. - -* `label` (Required, string) The display label for the configuration field. This label is shown in the user interface, adjacent to the field. - -* `options` (Required, array of `ConfigurationSelectOption`) An array of options for list-type fields. These options are used for inputs in the user interface, each having a label for display and a value. - -* `order` (Required, number) The order in which this configuration appears in the user interface. This helps in organizing fields logically. - -* `placeholder` (Required, string) Placeholder text for the configuration field. This text is displayed inside the field before a value is entered. - -* `required` (Required, boolean) Indicates whether the configuration is mandatory. If true, a value must be provided for the field. - -* `sensitive` (Required, boolean) Indicates whether the configuration contains sensitive information. Sensitive fields may be obfuscated in the user interface. - -* `tooltip` (Optional, string) Tooltip text providing additional information about the configuration. This text appears when the user hovers over the info icon next to the configuration field. - -* `type` (Required, string) The type of the configuration field, such as `str`, `int`, `bool`, `list`. This defines the data type and format of the field's value. - -* `ui_restrictions` (Required, array of strings) A list of UI restrictions. These restrictions define where in the user interface this field should be available or restricted. - -* `validations` (Required, array of `ConfigurationValidation`) An array of rules for validating the field's value. Each validation specifies a type and a constraint that the field's value must meet. - -* `value` (Required, string | number | bool) The current value of the configuration. This is the actual value set for the field and is used by the connector during its operations. - -`ConfigurationDependency` represents a dependency that a configuration field has on another field's value. It contains the following attributes: - -* `field` (Required, string) The name of the field in the configuration that this dependency relates to. - -* `value` (Required, string | number | bool) The required value of the specified field for this dependency to be met. - -`ConfigurationSelectOption` defines an option within a selectable configuration field. It contains the following attributes: - -* `label` (Required, string) The display label for the option. - -* `value` (Required, string) The actual value associated with the option. - -`ConfigurationValidation` specifies validation rules for configuration fields. Each ConfigurationValidation instance enforces a specific type of validation based on its type and constraint. It contains the following attributes: - -* `constraint` (Required, string | number) The validation constraint. The nature of this constraint depends on the validation type. It could be a numeric value, a list, a regular expression pattern. - -* `type` (Required, ConfigurationValidationType) The type of validation to be performed. Possible values include: `less_than`, `greater_than`, `list_type`, `included_in`, `regex` and `unset`. +(Optional, object) The configuration schema definition for the connector. The configuration field is a map where each key represents a specific configuration field name, and the value is a `ConnectorConfiguration` object. For connector management use `values` to pass config values. The `configuration` object is used by the Elastic connector service to register the connector configuration schema. [[update-connector-configuration-api-response-codes]] @@ -103,7 +54,7 @@ No connector matching `connector_id` could be found. [[update-connector-configuration-api-example]] ==== {api-examples-title} -The following example updates the `configuration` for the connector with ID `my-connector`: +The following example configures a `sharepoint_online` connector. Find the supported configuration options in the {enterprise-search-ref}/connectors-sharepoint-online.html[Sharepoint Online connector documentation] or by inspecting the schema in the connector's `configuration` field using the <>. //// [source, console] @@ -118,35 +69,227 @@ PUT _connector/my-spo-connector PUT _connector/my-spo-connector/_configuration { "configuration": { + "tenant_id": { + "default_value": null, + "depends_on": [], + "display": "textbox", + "label": "Tenant ID", + "options": [], + "order": 1, + "required": true, + "sensitive": false, + "tooltip": "", + "type": "str", + "ui_restrictions": [], + "validations": [], + "value": "" + }, + "tenant_name": { + "default_value": null, + "depends_on": [], + "display": "textbox", + "label": "Tenant name", + "options": [], + "order": 2, + "required": true, + "sensitive": false, + "tooltip": "", + "type": "str", + "ui_restrictions": [], + "validations": [], + "value": "" + }, "client_id": { - "default_value": null, - "depends_on": [], - "display": "text", - "label": "Client ID", - "options": [], - "order": 3, - "required": true, - "sensitive": false, - "tooltip": null, - "type": "str", - "ui_restrictions": [], - "validations": [], - "value": null + "default_value": null, + "depends_on": [], + "display": "textbox", + "label": "Client ID", + "options": [], + "order": 3, + "required": true, + "sensitive": false, + "tooltip": "", + "type": "str", + "ui_restrictions": [], + "validations": [], + "value": "" }, "secret_value": { - "default_value": null, - "depends_on": [], - "display": "text", - "label": "Secret value", - "options": [], - "order": 4, - "required": true, - "sensitive": true, - "tooltip": null, - "type": "str", - "ui_restrictions": [], - "validations": [], - "value": null + "default_value": null, + "depends_on": [], + "display": "textbox", + "label": "Secret value", + "options": [], + "order": 4, + "required": true, + "sensitive": true, + "tooltip": "", + "type": "str", + "ui_restrictions": [], + "validations": [], + "value": "" + }, + "site_collections": { + "default_value": null, + "depends_on": [], + "display": "textarea", + "label": "Comma-separated list of sites", + "options": [], + "order": 5, + "required": true, + "sensitive": false, + "tooltip": "A comma-separated list of sites to ingest data from. Use * to include all available sites.", + "type": "list", + "ui_restrictions": [], + "validations": [], + "value": "" + }, + "use_text_extraction_service": { + "default_value": false, + "depends_on": [], + "display": "toggle", + "label": "Use text extraction service", + "options": [], + "order": 6, + "required": true, + "sensitive": false, + "tooltip": "Requires a separate deployment of the Elastic Data Extraction Service. Also requires that pipeline settings disable text extraction.", + "type": "bool", + "ui_restrictions": [ + "advanced" + ], + "validations": [], + "value": false + }, + "use_document_level_security": { + "default_value": false, + "depends_on": [], + "display": "toggle", + "label": "Enable document level security", + "options": [], + "order": 7, + "required": true, + "sensitive": false, + "tooltip": "Document level security ensures identities and permissions set in Sharepoint Online are maintained in Elasticsearch. This metadata is added to your Elasticsearch documents, so you can control user and group read-access. Access control syncs ensure this metadata is kept up to date.", + "type": "bool", + "ui_restrictions": [], + "validations": [], + "value": false + }, + "fetch_drive_item_permissions": { + "default_value": true, + "depends_on": [ + { + "field": "use_document_level_security", + "value": true + } + ], + "display": "toggle", + "label": "Fetch drive item permissions", + "options": [], + "order": 8, + "required": true, + "sensitive": false, + "tooltip": "Enable this option to fetch drive item specific permissions. This setting can increase sync time.", + "type": "bool", + "ui_restrictions": [], + "validations": [], + "value": true + }, + "fetch_unique_page_permissions": { + "default_value": true, + "depends_on": [ + { + "field": "use_document_level_security", + "value": true + } + ], + "display": "toggle", + "label": "Fetch unique page permissions", + "options": [], + "order": 9, + "required": true, + "sensitive": false, + "tooltip": "Enable this option to fetch unique page permissions. This setting can increase sync time. If this setting is disabled a page will inherit permissions from its parent site.", + "type": "bool", + "ui_restrictions": [], + "validations": [], + "value": true + }, + "fetch_unique_list_permissions": { + "default_value": true, + "depends_on": [ + { + "field": "use_document_level_security", + "value": true + } + ], + "display": "toggle", + "label": "Fetch unique list permissions", + "options": [], + "order": 10, + "required": true, + "sensitive": false, + "tooltip": "Enable this option to fetch unique list permissions. This setting can increase sync time. If this setting is disabled a list will inherit permissions from its parent site.", + "type": "bool", + "ui_restrictions": [], + "validations": [], + "value": true + }, + "fetch_unique_list_item_permissions": { + "default_value": true, + "depends_on": [ + { + "field": "use_document_level_security", + "value": true + } + ], + "display": "toggle", + "label": "Fetch unique list item permissions", + "options": [], + "order": 11, + "required": true, + "sensitive": false, + "tooltip": "Enable this option to fetch unique list item permissions. This setting can increase sync time. If this setting is disabled a list item will inherit permissions from its parent site.", + "type": "bool", + "ui_restrictions": [], + "validations": [], + "value": true + }, + "enumerate_all_sites": { + "default_value": true, + "depends_on": [], + "display": "toggle", + "label": "Enumerate all sites?", + "options": [], + "order": 6, + "required": false, + "sensitive": false, + "tooltip": "If enabled, sites will be fetched in bulk, then filtered down to the configured list of sites. This is efficient when syncing many sites. If disabled, each configured site will be fetched with an individual request. This is efficient when syncing fewer sites.", + "type": "bool", + "ui_restrictions": [], + "validations": [], + "value": true + }, + "fetch_subsites": { + "default_value": false, + "depends_on": [ + { + "field": "enumerate_all_sites", + "value": false + } + ], + "display": "toggle", + "label": "Fetch sub-sites of configured sites?", + "options": [], + "order": 7, + "required": false, + "sensitive": false, + "tooltip": "Whether subsites of the configured site(s) should be automatically fetched.", + "type": "bool", + "ui_restrictions": [], + "validations": [], + "value": true } } } @@ -160,63 +303,16 @@ DELETE _connector/my-spo-connector // TEARDOWN //// -This example demonstrates how to register a `sharepoint_online` connector configuration schema. Note: The example does not cover all the necessary configuration fields for operating the Sharepoint Online connector. - -[source,console] ----- -PUT _connector/my-spo-connector/_configuration -{ - "configuration": { - "client_id": { - "default_value": null, - "depends_on": [], - "display": "text", - "label": "Client ID", - "options": [], - "order": 3, - "required": true, - "sensitive": false, - "tooltip": null, - "type": "str", - "ui_restrictions": [], - "validations": [], - "value": null - }, - "secret_value": { - "default_value": null, - "depends_on": [], - "display": "text", - "label": "Secret value", - "options": [], - "order": 4, - "required": true, - "sensitive": true, - "tooltip": null, - "type": "str", - "ui_restrictions": [], - "validations": [], - "value": null - } - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- - -An example to update configuration values for the `sharepoint_online` connector: - [source,console] ---- PUT _connector/my-spo-connector/_configuration { "values": { - "client_id": "my-client-id", - "secret_value": "super-secret-value" + "tenant_id": "my-tenant-id", + "tenant_name": "my-sharepoint-site", + "client_id": "foo", + "secret_value": "bar", + "site_collections": "*" } } ---- @@ -229,14 +325,17 @@ PUT _connector/my-spo-connector/_configuration ---- -An example to update single configuration field of the `sharepoint_online` connector. In this case other configuration values won't change: +When you're first setting up your connector you'll need to provide all required configuration details to start running syncs. +But you can also use this API to only update a subset of fields. +Here's an example that only updates the `secret_value` field for a `sharepoint_online` connector. +The other configuration values won't change. [source,console] ---- PUT _connector/my-spo-connector/_configuration { "values": { - "secret_value": "new-super-secret-value" + "secret_value": "foo-bar" } } ---- diff --git a/docs/reference/connector/apis/update-connector-filtering-api.asciidoc b/docs/reference/connector/apis/update-connector-filtering-api.asciidoc index 0f6bd442e78cb..c028eece2e168 100644 --- a/docs/reference/connector/apis/update-connector-filtering-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-filtering-api.asciidoc @@ -6,19 +6,22 @@ preview::[] +Updates the draft `filtering` configuration of a connector and marks the draft validation state as `edited`. The filtering configuration can be activated once validated by the Elastic connector service. -Updates the `filtering` configuration of a connector. Learn more about filtering in the {enterprise-search-ref}/sync-rules.html[sync rules] documentation. +The filtering property is used to configure sync rules (both basic and advanced) for a connector. Learn more in the {enterprise-search-ref}/sync-rules.html[sync rules documentation]. [[update-connector-filtering-api-request]] ==== {api-request-title} `PUT _connector//_filtering` +`PUT _connector//_filtering/_activate` [[update-connector-filtering-api-prereq]] ==== {api-prereq-title} * To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. * The `connector_id` parameter should reference an existing connector. +* To activate filtering rules, the `draft.validation.state` must be `valid`. [[update-connector-filtering-api-path-params]] ==== {api-path-parms-title} @@ -30,65 +33,42 @@ Updates the `filtering` configuration of a connector. Learn more about filtering [[update-connector-filtering-api-request-body]] ==== {api-request-body-title} -`filtering`:: -(Required, array) The filtering configuration for the connector. This configuration determines the set of rules applied for filtering data during syncs. - -Each entry in the `filtering` array represents a set of filtering rules for a specific data domain and includes the following attributes: - -- `domain` (Required, string) + -Specifies the data domain to which these filtering rules apply. - -- `active` (Required, object) + -Contains the set of rules that are actively used for sync jobs. The `active` object includes: - - * `rules` (Required, array of objects) + - An array of individual filtering rule objects, each with the following sub-attributes: - ** `id` (Required, string) + - A unique identifier for the rule. - ** `policy` (Required, string) + - Specifies the policy, such as "include" or "exclude". - ** `field` (Required, string) + - The field in the document to which this rule applies. - ** `rule` (Required, string) + - The type of rule, such as "regex", "starts_with", "ends_with", "contains", "equals", "<", ">", etc. - ** `value` (Required, string) + - The value to be used in conjunction with the rule for matching the contents of the document's field. - ** `order` (Required, number) + - The order in which the rules are applied. The first rule to match has its policy applied. - ** `created_at` (Required, datetime) + - The timestamp when the rule was added. - ** `updated_at` (Required, datetime) + - The timestamp when the rule was last edited. - - * `advanced_snippet` (Required, object) + - Used for {enterprise-search-ref}/sync-rules.html#sync-rules-advanced[advanced filtering] at query time, with the following sub-attributes: - ** `value` (Required, object) + - A JSON object passed directly to the connector for advanced filtering. - ** `created_at` (Required, datetime) + - The timestamp when this JSON object was created. - ** `updated_at` (Required, datetime) + - The timestamp when this JSON object was last edited. - - * `validation` (Required, object) + - Provides validation status for the rules, including: - ** `state` (Required, string) + - Indicates the validation state: "edited", "valid", or "invalid". - ** `errors` (Required, object) + - Contains details about any validation errors, with sub-attributes: - *** `ids` (Required, string) + - The ID(s) of any rules deemed invalid. - *** `messages` (Required, string) + - Messages explaining what is invalid about the rules. - -- `draft` (Required, object) + -An object identical in structure to the `active` object, but used for drafting and editing filtering rules before they become active. +`rules`:: +(Optional, array of objects) +An array of {enterprise-search-ref}/sync-rules.html#sync-rules-basic[basic sync rules], each with the following sub-attributes: +* `id` (Required, string) + +A unique identifier for the rule. +* `policy` (Required, string) + +Specifies the policy, such as `include` or `exclude`. +* `field` (Required, string) + +The field in the document to which this rule applies. +* `rule` (Required, string) + +The type of rule, such as `regex`, `starts_with`, `ends_with`, `contains`, `equals`, `<`, `>`, etc. +* `value` (Required, string) + +The value to be used in conjunction with the rule for matching the contents of the document's field. +* `order` (Required, number) + +The order in which the rules are applied. The first rule to match has its policy applied. +* `created_at` (Optional, datetime) + +The timestamp when the rule was added. Defaults to `now` UTC timestamp. +* `updated_at` (Optional, datetime) + +The timestamp when the rule was last edited. Defaults to `now` UTC timestamp. + +`advanced_snippet`:: +(Optional, object) +Used for {enterprise-search-ref}/sync-rules.html#sync-rules-advanced[advanced filtering] at query time, with the following sub-attributes: +* `value` (Required, object or array) + +A JSON object/array passed directly to the connector for advanced filtering. +* `created_at` (Optional, datetime) + +The timestamp when this JSON object was created. Defaults to `now` UTC timestamp. +* `updated_at` (Optional, datetime) + +The timestamp when this JSON object was last edited. Defaults to `now` UTC timestamp. [[update-connector-filtering-api-response-codes]] ==== {api-response-codes-title} `200`:: -Connector `filtering` field was successfully updated. +Connector draft filtering was successfully updated. `400`:: The `connector_id` was not provided or the request payload was malformed. @@ -99,80 +79,56 @@ No connector matching `connector_id` could be found. [[update-connector-filtering-api-example]] ==== {api-examples-title} -The following example updates the `filtering` property for the connector with ID `my-connector`: +The following example updates the draft {enterprise-search-ref}/sync-rules.html#sync-rules-basic[basic sync rules] for a Google Drive connector with ID `my-g-drive-connector`. All Google Drive files with `.txt` extension will be skipped: //// [source, console] -------------------------------------------------- -PUT _connector/my-connector +PUT _connector/my-g-drive-connector { "index_name": "search-google-drive", "name": "My Connector", "service_type": "google_drive" } + +PUT _connector/my-sql-connector +{ + "index_name": "search-sql", + "name": "My SQL Connector", + "service_type": "google_drive" +} + -------------------------------------------------- // TESTSETUP [source,console] -------------------------------------------------- -DELETE _connector/my-connector +DELETE _connector/my-g-drive-connector +DELETE _connector/my-sql-connector -------------------------------------------------- // TEARDOWN //// [source,console] ---- -PUT _connector/my-connector/_filtering +PUT _connector/my-g-drive-connector/_filtering { - "filtering": [ + "rules": [ + { + "field": "file_extension", + "id": "exclude-txt-files", + "order": 0, + "policy": "exclude", + "rule": "equals", + "value": "txt" + }, { - "active": { - "advanced_snippet": { - "created_at": "2023-11-09T15:13:08.231Z", - "updated_at": "2023-11-09T15:13:08.231Z", - "value": {} - }, - "rules": [ - { - "created_at": "2023-11-09T15:13:08.231Z", - "field": "_", - "id": "DEFAULT", - "order": 0, - "policy": "include", - "rule": "regex", - "updated_at": "2023-11-09T15:13:08.231Z", - "value": ".*" - } - ], - "validation": { - "errors": [], - "state": "valid" - } - }, - "domain": "DEFAULT", - "draft": { - "advanced_snippet": { - "created_at": "2023-11-09T15:13:08.231Z", - "updated_at": "2023-11-09T15:13:08.231Z", - "value": {} - }, - "rules": [ - { - "created_at": "2023-11-09T15:13:08.231Z", - "field": "_", - "id": "DEFAULT", - "order": 0, - "policy": "include", - "rule": "regex", - "updated_at": "2023-11-09T15:13:08.231Z", - "value": ".*" - } - ], - "validation": { - "errors": [], - "state": "valid" - } - } + "field": "_", + "id": "DEFAULT", + "order": 1, + "policy": "include", + "rule": "regex", + "value": ".*" } ] } @@ -184,3 +140,64 @@ PUT _connector/my-connector/_filtering "result": "updated" } ---- + +The following example updates the draft advanced sync rules for a MySQL connector with id `my-sql-connector`. Advanced sync rules are specific to each connector type. Refer to the references for connectors that support {enterprise-search-ref}/sync-rules.html#sync-rules-advanced[advanced sync rules] for syntax and examples. + +[source,console] +---- +PUT _connector/my-sql-connector/_filtering +{ + "advanced_snippet": { + "value": [{ + "tables": [ + "users", + "orders" + ], + "query": "SELECT users.id AS id, orders.order_id AS order_id FROM users JOIN orders ON users.id = orders.user_id" + }] + } +} +---- + +[source,console-result] +---- +{ + "result": "updated" +} +---- + + +//// +[source, console] +-------------------------------------------------- +PUT _connector/my-sql-connector/_filtering/_validation +{ + "validation": { + "state": "valid", + "errors": [] + } +} +-------------------------------------------------- +// TEST[continued] +//// + + +Note, you can also update draft `rules` and `advanced_snippet` in a single request. + +Once the draft is updated, its validation state is set to `edited`. The connector service will then validate the rules and report the validation state as either `invalid` or `valid`. If the state is `valid`, the draft filtering can be activated with: + + +[source,console] +---- +PUT _connector/my-sql-connector/_filtering/_activate +---- +// TEST[continued] + +[source,console-result] +---- +{ + "result": "updated" +} +---- + +Once filtering rules are activated, they will be applied to all subsequent full or incremental syncs. diff --git a/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc b/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc index 1b9a2854649e4..df7a18ec6ad66 100644 --- a/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc @@ -32,13 +32,13 @@ Updates the `scheduling` configuration of a connector. `scheduling`:: (Required, object) The scheduling configuration for the connector. This configuration determines frequency of synchronization operations for the connector. -The scheduling configuration includes the following attributes, each represented as a `ScheduleConfig` object: +The scheduling configuration includes the following attributes, each represented as a `ScheduleConfig` object. If the `scheduling` object does not include all schedule types, only those provided will be updated; the others will remain unchanged. -- `access_control` (Required, `ScheduleConfig` object) Defines the schedule for synchronizing access control settings of the connector. +- `access_control` (Optional, `ScheduleConfig` object) Defines the schedule for synchronizing access control settings of the connector. -- `full` (Required, `ScheduleConfig` object) Defines the schedule for a full content syncs. +- `full` (Optional, `ScheduleConfig` object) Defines the schedule for a full content syncs. -- `incremental` (Required, `ScheduleConfig` object) Defines the schedule for incremental content syncs. +- `incremental` (Optional, `ScheduleConfig` object) Defines the schedule for incremental content syncs. Each `ScheduleConfig` object includes the following sub-attributes: @@ -110,3 +110,25 @@ PUT _connector/my-connector/_scheduling "result": "updated" } ---- + +The following example updates `full` sync schedule only, other schedule types remain unchanged: + +[source,console] +---- +PUT _connector/my-connector/_scheduling +{ + "scheduling": { + "full": { + "enabled": true, + "interval": "0 10 0 * * ?" + } + } +} +---- + +[source,console-result] +---- +{ + "result": "updated" +} +---- From 0c41cb7e71ac31fde65bcad8971e67467f0a279a Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Fri, 26 Apr 2024 16:24:20 +0300 Subject: [PATCH 07/14] [TEST] simplify synthetic source yaml test (#107949) --- .../indices.create/20_synthetic_source.yml | 22 +++---------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 39787366c0cc9..874778f9bdb5c 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -68,25 +68,18 @@ object with unmapped fields: body: - '{ "create": { } }' - '{ "name": "aaaa", "some_string": "AaAa", "some_int": 1000, "some_double": 123.456789, "some_bool": true, "a.very.deeply.nested.field": "AAAA" }' - - '{ "create": { } }' - - '{ "name": "bbbb", "some_string": "BbBb", "some_int": 2000, "some_double": 321.987654, "some_bool": false, "a.very.deeply.nested.field": "BBBB" }' - do: search: index: test - - match: { hits.total.value: 2 } + - match: { hits.total.value: 1 } - match: { hits.hits.0._source.name: aaaa } - match: { hits.hits.0._source.some_string: AaAa } - match: { hits.hits.0._source.some_int: 1000 } - match: { hits.hits.0._source.some_double: 123.456789 } - match: { hits.hits.0._source.a.very.deeply.nested.field: AAAA } - match: { hits.hits.0._source.some_bool: true } - - match: { hits.hits.1._source.name: bbbb } - - match: { hits.hits.1._source.some_string: BbBb } - - match: { hits.hits.1._source.some_int: 2000 } - - match: { hits.hits.1._source.some_double: 321.987654 } - - match: { hits.hits.1._source.a.very.deeply.nested.field: BBBB } --- @@ -124,20 +117,15 @@ nested object with unmapped fields: body: - '{ "create": { } }' - '{ "path.to.name": "aaaa", "path.to.surname": "AaAa", "path.some.other.name": "AaAaAa" }' - - '{ "create": { } }' - - '{ "path.to.name": "bbbb", "path.to.surname": "BbBb", "path.some.other.name": "BbBbBb" }' - do: search: index: test - - match: { hits.total.value: 2 } + - match: { hits.total.value: 1 } - match: { hits.hits.0._source.path.to.name: aaaa } - match: { hits.hits.0._source.path.to.surname: AaAa } - match: { hits.hits.0._source.path.some.other.name: AaAaAa } - - match: { hits.hits.1._source.path.to.name: bbbb } - - match: { hits.hits.1._source.path.to.surname: BbBb } - - match: { hits.hits.1._source.path.some.other.name: BbBbBb } --- @@ -175,15 +163,11 @@ empty object with unmapped fields: body: - '{ "create": { } }' - '{ "path.to.surname": "AaAa", "path.some.other.name": "AaAaAa" }' - - '{ "create": { } }' - - '{ "path.to.surname": "BbBb", "path.some.other.name": "BbBbBb" }' - do: search: index: test - - match: { hits.total.value: 2 } + - match: { hits.total.value: 1 } - match: { hits.hits.0._source.path.to.surname: AaAa } - match: { hits.hits.0._source.path.some.other.name: AaAaAa } - - match: { hits.hits.1._source.path.to.surname: BbBb } - - match: { hits.hits.1._source.path.some.other.name: BbBbBb } From 3ed42f38c3363b5108d0ed07c49af186d438694e Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Fri, 26 Apr 2024 08:24:13 -0600 Subject: [PATCH 08/14] Add data-stream auto-sharding APM metrics (#107593) Add APM metrics to monitor data stream auto-sharding events. The new metrics are: - es.auto_sharding.increase_shards.total - es.auto_sharding.decrease_shards.total - es.auto_sharding.cooldown_prevented_increase.total - es.auto_sharding.cooldown_prevented_decrease.total The first two track situations where the shards increase or decrease during a rollover. The latter two events track when the auto-sharding logic recommends an increase or decrease but the shard change did not take place because we are in a cooldown period due to a recent increase or decrease auto-sharding event. --- docs/changelog/107593.yaml | 5 + .../datastreams/DataStreamAutoshardingIT.java | 62 +++++++- .../DataStreamGetWriteIndexTests.java | 5 +- ...etadataDataStreamRolloverServiceTests.java | 25 +++- .../rollover/MetadataRolloverService.java | 33 ++++- .../elasticsearch/cluster/ClusterModule.java | 5 + ...adataRolloverServiceAutoShardingTests.java | 140 +++++++++++++++++- .../MetadataRolloverServiceTests.java | 26 +++- .../TransportRolloverActionTests.java | 5 +- .../metadata/DataStreamTestHelper.java | 7 +- 10 files changed, 291 insertions(+), 22 deletions(-) create mode 100644 docs/changelog/107593.yaml diff --git a/docs/changelog/107593.yaml b/docs/changelog/107593.yaml new file mode 100644 index 0000000000000..2e3d2cbc80119 --- /dev/null +++ b/docs/changelog/107593.yaml @@ -0,0 +1,5 @@ +pr: 107593 +summary: Add auto-sharding APM metrics +area: Infra/Metrics +type: enhancement +issues: [] diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java index f7743ebac9caf..a4c9a9d3e1c67 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.admin.indices.rollover.Condition; import org.elasticsearch.action.admin.indices.rollover.MaxDocsCondition; +import org.elasticsearch.action.admin.indices.rollover.MetadataRolloverService; import org.elasticsearch.action.admin.indices.rollover.OptimalShardCountCondition; import org.elasticsearch.action.admin.indices.rollover.RolloverConditions; import org.elasticsearch.action.admin.indices.rollover.RolloverInfo; @@ -25,6 +26,7 @@ import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.datastreams.CreateDataStreamAction; +import org.elasticsearch.action.datastreams.autosharding.AutoShardingType; import org.elasticsearch.action.datastreams.autosharding.DataStreamAutoShardingService; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.cluster.ClusterState; @@ -49,7 +51,11 @@ import org.elasticsearch.index.shard.ShardPath; import org.elasticsearch.index.store.StoreStats; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.telemetry.InstrumentType; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.xcontent.XContentType; @@ -60,6 +66,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -67,7 +74,9 @@ import static org.elasticsearch.action.datastreams.autosharding.DataStreamAutoShardingService.DATA_STREAMS_AUTO_SHARDING_ENABLED; import static org.elasticsearch.cluster.metadata.MetadataIndexTemplateService.DEFAULT_TIMESTAMP_FIELD; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; @@ -77,7 +86,12 @@ public class DataStreamAutoshardingIT extends ESIntegTestCase { @Override protected Collection> nodePlugins() { - return List.of(DataStreamsPlugin.class, MockTransportService.TestPlugin.class, TestAutoshardingPlugin.class); + return List.of( + DataStreamsPlugin.class, + MockTransportService.TestPlugin.class, + TestAutoshardingPlugin.class, + TestTelemetryPlugin.class + ); } @Before @@ -109,6 +123,7 @@ public void testRolloverOnAutoShardCondition() throws Exception { indexDocs(dataStreamName, randomIntBetween(100, 200)); { + resetTelemetry(); ClusterState clusterStateBeforeRollover = internalCluster().getCurrentMasterNodeInstance(ClusterService.class).state(); DataStream dataStreamBeforeRollover = clusterStateBeforeRollover.getMetadata().dataStreams().get(dataStreamName); String assignedShardNodeId = clusterStateBeforeRollover.routingTable() @@ -152,11 +167,14 @@ public void testRolloverOnAutoShardCondition() throws Exception { assertThat(metConditions.get(0).value(), instanceOf(Integer.class)); int autoShardingRolloverInfo = (int) metConditions.get(0).value(); assertThat(autoShardingRolloverInfo, is(5)); + + assertTelemetry(MetadataRolloverService.AUTO_SHARDING_METRIC_NAMES.get(AutoShardingType.INCREASE_SHARDS)); } // let's do another rollover now that will not increase the number of shards because the increase shards cooldown has not lapsed, // however the rollover will use the existing/previous auto shard configuration and the new generation index will have 5 shards { + resetTelemetry(); ClusterState clusterStateBeforeRollover = internalCluster().getCurrentMasterNodeInstance(ClusterService.class).state(); DataStream dataStreamBeforeRollover = clusterStateBeforeRollover.getMetadata().dataStreams().get(dataStreamName); String assignedShardNodeId = clusterStateBeforeRollover.routingTable() @@ -193,6 +211,8 @@ public void testRolloverOnAutoShardCondition() throws Exception { // we remained on 5 shards due to the increase shards cooldown assertThat(thirdGenerationMeta.getNumberOfShards(), is(5)); + + assertTelemetry(MetadataRolloverService.AUTO_SHARDING_METRIC_NAMES.get(AutoShardingType.COOLDOWN_PREVENTED_INCREASE)); } { @@ -566,4 +586,44 @@ private static void mockStatsForIndex( } } } + + private static void resetTelemetry() { + for (PluginsService pluginsService : internalCluster().getInstances(PluginsService.class)) { + final TestTelemetryPlugin telemetryPlugin = pluginsService.filterPlugins(TestTelemetryPlugin.class).findFirst().orElseThrow(); + telemetryPlugin.resetMeter(); + } + } + + private static void assertTelemetry(String expectedEmittedMetric) { + Map> measurements = new HashMap<>(); + for (PluginsService pluginsService : internalCluster().getInstances(PluginsService.class)) { + final TestTelemetryPlugin telemetryPlugin = pluginsService.filterPlugins(TestTelemetryPlugin.class).findFirst().orElseThrow(); + + telemetryPlugin.collect(); + + List autoShardingMetrics = telemetryPlugin.getRegisteredMetrics(InstrumentType.LONG_COUNTER) + .stream() + .filter(metric -> metric.startsWith("es.auto_sharding.")) + .sorted() + .toList(); + + assertEquals(autoShardingMetrics, MetadataRolloverService.AUTO_SHARDING_METRIC_NAMES.values().stream().sorted().toList()); + + for (String metricName : MetadataRolloverService.AUTO_SHARDING_METRIC_NAMES.values()) { + measurements.computeIfAbsent(metricName, n -> new ArrayList<>()) + .addAll(telemetryPlugin.getLongCounterMeasurement(metricName)); + } + } + + // assert other metrics not emitted + MetadataRolloverService.AUTO_SHARDING_METRIC_NAMES.values() + .stream() + .filter(metric -> metric.equals(expectedEmittedMetric) == false) + .forEach(metric -> assertThat(measurements.get(metric), empty())); + + assertThat(measurements.get(expectedEmittedMetric), hasSize(1)); + Measurement measurement = measurements.get(expectedEmittedMetric).get(0); + assertThat(measurement.getLong(), is(1L)); + assertFalse(measurement.isDouble()); + } } diff --git a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/DataStreamGetWriteIndexTests.java b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/DataStreamGetWriteIndexTests.java index 111a46bb7098b..ccb8abbb9efab 100644 --- a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/DataStreamGetWriteIndexTests.java +++ b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/DataStreamGetWriteIndexTests.java @@ -49,6 +49,7 @@ import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.ShardLimitValidator; import org.elasticsearch.script.ScriptCompiler; +import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; @@ -272,13 +273,15 @@ public void setup() throws Exception { indicesService, xContentRegistry() ); + TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); rolloverService = new MetadataRolloverService( testThreadPool, createIndexService, indexAliasesService, EmptySystemIndices.INSTANCE, WriteLoadForecaster.DEFAULT, - clusterService + clusterService, + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); } diff --git a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/MetadataDataStreamRolloverServiceTests.java b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/MetadataDataStreamRolloverServiceTests.java index 2185f8f50a93f..86f6dea220e84 100644 --- a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/MetadataDataStreamRolloverServiceTests.java +++ b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/MetadataDataStreamRolloverServiceTests.java @@ -31,6 +31,7 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.MapperTestUtils; +import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; @@ -88,6 +89,7 @@ public void testRolloverClusterStateForDataStream() throws Exception { ); builder.put(dataStream); final ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(builder).build(); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { @@ -95,7 +97,8 @@ public void testRolloverClusterStateForDataStream() throws Exception { dataStream, testThreadPool, Set.of(createSettingsProvider(xContentRegistry())), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); List> metConditions = Collections.singletonList(condition); @@ -184,6 +187,7 @@ public void testRolloverAndMigrateDataStream() throws Exception { ); builder.put(dataStream); final ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(builder).build(); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { @@ -191,7 +195,8 @@ public void testRolloverAndMigrateDataStream() throws Exception { dataStream, testThreadPool, Set.of(createSettingsProvider(xContentRegistry())), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); List> metConditions = Collections.singletonList(condition); @@ -271,14 +276,15 @@ public void testChangingIndexModeFromTimeSeriesToSomethingElseNoEffectOnExisting ); builder.put(dataStream); final ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(builder).build(); - + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { MetadataRolloverService rolloverService = DataStreamTestHelper.getMetadataRolloverService( dataStream, testThreadPool, Set.of(createSettingsProvider(xContentRegistry())), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); List> metConditions = Collections.singletonList(condition); @@ -336,14 +342,16 @@ public void testRolloverClusterStateWithBrokenOlderTsdbDataStream() throws Excep int numberOfBackingIndices = randomIntBetween(1, 3); ClusterState clusterState = createClusterState(dataStreamName, numberOfBackingIndices, now, true); DataStream dataStream = clusterState.metadata().dataStreams().get(dataStreamName); - ThreadPool testThreadPool = new TestThreadPool(getTestName()); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); + try { MetadataRolloverService rolloverService = DataStreamTestHelper.getMetadataRolloverService( dataStream, testThreadPool, Set.of(createSettingsProvider(xContentRegistry())), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); List> metConditions = Collections.singletonList(condition); @@ -417,14 +425,15 @@ public void testRolloverClusterStateWithBrokenTsdbDataStream() throws Exception int numberOfBackingIndices = randomIntBetween(1, 3); ClusterState clusterState = createClusterState(dataStreamName, numberOfBackingIndices, now, false); DataStream dataStream = clusterState.metadata().dataStreams().get(dataStreamName); - + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { MetadataRolloverService rolloverService = DataStreamTestHelper.getMetadataRolloverService( dataStream, testThreadPool, Set.of(createSettingsProvider(xContentRegistry())), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); List> metConditions = Collections.singletonList(condition); diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java b/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java index 45368c185fb77..4284d860d85c0 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java @@ -13,6 +13,7 @@ import org.elasticsearch.action.admin.indices.create.CreateIndexClusterStateUpdateRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.datastreams.autosharding.AutoShardingResult; +import org.elasticsearch.action.datastreams.autosharding.AutoShardingType; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.AliasAction; @@ -46,6 +47,8 @@ import org.elasticsearch.indices.SystemIndices; import org.elasticsearch.snapshots.SnapshotInProgressException; import org.elasticsearch.snapshots.SnapshotsService; +import org.elasticsearch.telemetry.TelemetryProvider; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.threadpool.ThreadPool; import java.time.Instant; @@ -70,8 +73,17 @@ public class MetadataRolloverService { private static final Logger logger = LogManager.getLogger(MetadataRolloverService.class); private static final Pattern INDEX_NAME_PATTERN = Pattern.compile("^.*-\\d+$"); private static final List VALID_ROLLOVER_TARGETS = List.of(ALIAS, DATA_STREAM); - public static final Settings HIDDEN_INDEX_SETTINGS = Settings.builder().put(IndexMetadata.SETTING_INDEX_HIDDEN, true).build(); + public static final Map AUTO_SHARDING_METRIC_NAMES = Map.of( + AutoShardingType.INCREASE_SHARDS, + "es.auto_sharding.increase_shards.total", + AutoShardingType.DECREASE_SHARDS, + "es.auto_sharding.decrease_shards.total", + AutoShardingType.COOLDOWN_PREVENTED_INCREASE, + "es.auto_sharding.cooldown_prevented_increase.total", + AutoShardingType.COOLDOWN_PREVENTED_DECREASE, + "es.auto_sharding.cooldown_prevented_decrease.total" + ); private final ThreadPool threadPool; private final MetadataCreateIndexService createIndexService; @@ -79,6 +91,7 @@ public class MetadataRolloverService { private final SystemIndices systemIndices; private final WriteLoadForecaster writeLoadForecaster; private final ClusterService clusterService; + private final MeterRegistry meterRegistry; @Inject public MetadataRolloverService( @@ -87,7 +100,8 @@ public MetadataRolloverService( MetadataIndexAliasesService indexAliasesService, SystemIndices systemIndices, WriteLoadForecaster writeLoadForecaster, - ClusterService clusterService + ClusterService clusterService, + TelemetryProvider telemetryProvider ) { this.threadPool = threadPool; this.createIndexService = createIndexService; @@ -95,6 +109,14 @@ public MetadataRolloverService( this.systemIndices = systemIndices; this.writeLoadForecaster = writeLoadForecaster; this.clusterService = clusterService; + this.meterRegistry = telemetryProvider.getMeterRegistry(); + + for (var entry : AUTO_SHARDING_METRIC_NAMES.entrySet()) { + final AutoShardingType type = entry.getKey(); + final String metricName = entry.getValue(); + final String description = String.format(Locale.ROOT, "auto-sharding %s counter", type.name().toLowerCase(Locale.ROOT)); + meterRegistry.registerLongCounter(metricName, description, "unit"); + } } public record RolloverResult(String rolloverIndexName, String sourceIndexName, ClusterState clusterState) { @@ -330,6 +352,13 @@ private RolloverResult rolloverDataStream( (builder, indexMetadata) -> builder.put(dataStream.rolloverFailureStore(indexMetadata.getIndex(), newGeneration)) ); } else { + if (autoShardingResult != null) { + final String metricName = AUTO_SHARDING_METRIC_NAMES.get(autoShardingResult.type()); + if (metricName != null) { + meterRegistry.getLongCounter(metricName).increment(); + } + } + DataStreamAutoShardingEvent dataStreamAutoShardingEvent = autoShardingResult == null ? dataStream.getAutoShardingEvent() : switch (autoShardingResult.type()) { diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 809e069b0028b..60140e2a08714 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -8,6 +8,7 @@ package org.elasticsearch.cluster; +import org.elasticsearch.action.admin.indices.rollover.MetadataRolloverService; import org.elasticsearch.cluster.action.index.MappingUpdatedAction; import org.elasticsearch.cluster.action.shard.ShardStateAction; import org.elasticsearch.cluster.metadata.ComponentTemplateMetadata; @@ -120,6 +121,7 @@ public class ClusterModule extends AbstractModule { final ShardsAllocator shardsAllocator; private final ShardRoutingRoleStrategy shardRoutingRoleStrategy; private final AllocationStatsService allocationStatsService; + private final TelemetryProvider telemetryProvider; public ClusterModule( Settings settings, @@ -157,6 +159,7 @@ public ClusterModule( ); this.metadataDeleteIndexService = new MetadataDeleteIndexService(settings, clusterService, allocationService); this.allocationStatsService = new AllocationStatsService(clusterService, clusterInfoService, shardsAllocator, writeLoadForecaster); + this.telemetryProvider = telemetryProvider; } static ShardRoutingRoleStrategy getShardRoutingRoleStrategy(List clusterPlugins) { @@ -444,6 +447,8 @@ protected void configure() { bind(ShardsAllocator.class).toInstance(shardsAllocator); bind(ShardRoutingRoleStrategy.class).toInstance(shardRoutingRoleStrategy); bind(AllocationStatsService.class).toInstance(allocationStatsService); + bind(TelemetryProvider.class).toInstance(telemetryProvider); + bind(MetadataRolloverService.class).asEagerSingleton(); } public void setExistingShardsAllocators(GatewayAllocator gatewayAllocator) { diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverServiceAutoShardingTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverServiceAutoShardingTests.java index 906b2434f7d39..41176276a42c0 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverServiceAutoShardingTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverServiceAutoShardingTests.java @@ -25,6 +25,8 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; @@ -41,6 +43,7 @@ import static org.elasticsearch.action.datastreams.autosharding.AutoShardingType.NOT_APPLICABLE; import static org.elasticsearch.action.datastreams.autosharding.AutoShardingType.NO_CHANGE_REQUIRED; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_INDEX_UUID; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasItem; @@ -82,17 +85,20 @@ public void testRolloverDataStreamWithoutExistingAutosharding() throws Exception builder.put(dataStream); final ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(builder).build(); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { MetadataRolloverService rolloverService = DataStreamTestHelper.getMetadataRolloverService( dataStream, testThreadPool, Set.of(), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); // let's rollover the data stream using all the possible autosharding recommendations for (AutoShardingType type : AutoShardingType.values()) { + telemetryPlugin.resetMeter(); long before = testThreadPool.absoluteTimeInMillis(); switch (type) { case INCREASE_SHARDS -> { @@ -111,6 +117,15 @@ public void testRolloverDataStreamWithoutExistingAutosharding() throws Exception false ); assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), metConditions, 5); + assertTelemetry( + telemetryPlugin, + "es.auto_sharding.increase_shards.total", + List.of( + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } case DECREASE_SHARDS -> { { @@ -138,6 +153,15 @@ public void testRolloverDataStreamWithoutExistingAutosharding() throws Exception metConditions, 1 ); + assertTelemetry( + telemetryPlugin, + "es.auto_sharding.decrease_shards.total", + List.of( + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } { @@ -190,6 +214,15 @@ public void testRolloverDataStreamWithoutExistingAutosharding() throws Exception ); // the expected number of shards remains 3 for the data stream due to the remaining cooldown assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), List.of(), 3); + assertTelemetry( + telemetryPlugin, + "es.auto_sharding.cooldown_prevented_increase.total", + List.of( + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } case COOLDOWN_PREVENTED_DECREASE -> { MetadataRolloverService.RolloverResult rolloverResult = rolloverService.rolloverClusterState( @@ -207,6 +240,15 @@ public void testRolloverDataStreamWithoutExistingAutosharding() throws Exception ); // the expected number of shards remains 3 for the data stream due to the remaining cooldown assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), List.of(), 3); + assertTelemetry( + telemetryPlugin, + "es.auto_sharding.cooldown_prevented_decrease.total", + List.of( + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total" + ) + ); } case NO_CHANGE_REQUIRED -> { List> metConditions = List.of(new MaxDocsCondition(randomNonNegativeLong())); @@ -224,6 +266,16 @@ public void testRolloverDataStreamWithoutExistingAutosharding() throws Exception false ); assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), metConditions, 3); + assertTelemetry( + telemetryPlugin, + null, + List.of( + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } case NOT_APPLICABLE -> { List> metConditions = List.of(new MaxDocsCondition(randomNonNegativeLong())); @@ -241,6 +293,16 @@ public void testRolloverDataStreamWithoutExistingAutosharding() throws Exception false ); assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), metConditions, 3); + assertTelemetry( + telemetryPlugin, + null, + List.of( + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } } } @@ -285,17 +347,20 @@ public void testRolloverDataStreamWithExistingAutoShardEvent() throws Exception builder.put(dataStream); final ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(builder).build(); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { MetadataRolloverService rolloverService = DataStreamTestHelper.getMetadataRolloverService( dataStream, testThreadPool, Set.of(), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); // let's rollover the data stream using all the possible autosharding recommendations for (AutoShardingType type : AutoShardingType.values()) { + telemetryPlugin.resetMeter(); long before = testThreadPool.absoluteTimeInMillis(); switch (type) { case INCREASE_SHARDS -> { @@ -314,6 +379,15 @@ public void testRolloverDataStreamWithExistingAutoShardEvent() throws Exception false ); assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), metConditions, 5); + assertTelemetry( + telemetryPlugin, + "es.auto_sharding.increase_shards.total", + List.of( + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } case DECREASE_SHARDS -> { { @@ -341,6 +415,15 @@ public void testRolloverDataStreamWithExistingAutoShardEvent() throws Exception metConditions, 1 ); + assertTelemetry( + telemetryPlugin, + "es.auto_sharding.decrease_shards.total", + List.of( + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } { @@ -386,6 +469,15 @@ public void testRolloverDataStreamWithExistingAutoShardEvent() throws Exception ); // the expected number of shards remains 3 for the data stream due to the remaining cooldown assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), List.of(), 3); + assertTelemetry( + telemetryPlugin, + "es.auto_sharding.cooldown_prevented_increase.total", + List.of( + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } case COOLDOWN_PREVENTED_DECREASE -> { MetadataRolloverService.RolloverResult rolloverResult = rolloverService.rolloverClusterState( @@ -403,6 +495,15 @@ public void testRolloverDataStreamWithExistingAutoShardEvent() throws Exception ); // the expected number of shards remains 3 for the data stream due to the remaining cooldown assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), List.of(), 3); + assertTelemetry( + telemetryPlugin, + "es.auto_sharding.cooldown_prevented_decrease.total", + List.of( + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total" + ) + ); } case NO_CHANGE_REQUIRED -> { List> metConditions = List.of(new MaxDocsCondition(randomNonNegativeLong())); @@ -420,6 +521,16 @@ public void testRolloverDataStreamWithExistingAutoShardEvent() throws Exception false ); assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), metConditions, 3); + assertTelemetry( + telemetryPlugin, + null, + List.of( + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } case NOT_APPLICABLE -> { List> metConditions = List.of(new MaxDocsCondition(randomNonNegativeLong())); @@ -438,6 +549,16 @@ public void testRolloverDataStreamWithExistingAutoShardEvent() throws Exception ); // if the auto sharding is not applicable we just use whatever's in the index template (1 shard in this case) assertRolloverResult(dataStream, rolloverResult, before, testThreadPool.absoluteTimeInMillis(), metConditions, 1); + assertTelemetry( + telemetryPlugin, + null, + List.of( + "es.auto_sharding.decrease_shards.total", + "es.auto_sharding.increase_shards.total", + "es.auto_sharding.cooldown_prevented_increase.total", + "es.auto_sharding.cooldown_prevented_decrease.total" + ) + ); } } } @@ -500,4 +621,19 @@ private static IndexMetadata.Builder getIndexMetadataBuilderForIndex(Index index .numberOfShards(numberOfShards) .numberOfReplicas(1); } + + private static void assertTelemetry(TestTelemetryPlugin telemetryPlugin, String presentMetric, List missingMetrics) { + if (presentMetric != null) { + final List measurements = telemetryPlugin.getLongCounterMeasurement(presentMetric); + assertThat(measurements, hasSize(1)); + Measurement measurement = measurements.get(0); + assertThat(measurement.getLong(), is(1L)); + assertFalse(measurement.isDouble()); + } + + for (String metric : missingMetrics) { + final List measurements = telemetryPlugin.getLongCounterMeasurement(metric); + assertThat(measurements, empty()); + } + } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverServiceTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverServiceTests.java index 0bf92df006894..149752578e1ea 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverServiceTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverServiceTests.java @@ -37,6 +37,7 @@ import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.indices.EmptySystemIndices; +import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; @@ -52,6 +53,7 @@ import java.util.Set; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasItem; @@ -533,6 +535,7 @@ public void testRolloverClusterState() throws Exception { final ClusterState clusterState = ClusterState.builder(new ClusterName("test")) .metadata(Metadata.builder().put(indexMetadata)) .build(); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { @@ -540,7 +543,8 @@ public void testRolloverClusterState() throws Exception { null, testThreadPool, Set.of(), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); @@ -586,6 +590,10 @@ public void testRolloverClusterState() throws Exception { assertThat(info.getTime(), greaterThanOrEqualTo(before)); assertThat(info.getMetConditions(), hasSize(1)); assertThat(info.getMetConditions().get(0).value(), equalTo(condition.value())); + + for (String metric : MetadataRolloverService.AUTO_SHARDING_METRIC_NAMES.values()) { + assertThat(telemetryPlugin.getLongCounterMeasurement(metric), empty()); + } } finally { testThreadPool.shutdown(); } @@ -606,6 +614,7 @@ public void testRolloverClusterStateForDataStream() throws Exception { } builder.put(dataStream); final ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(builder).build(); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { @@ -613,7 +622,8 @@ public void testRolloverClusterStateForDataStream() throws Exception { dataStream, testThreadPool, Set.of(), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); @@ -675,6 +685,7 @@ public void testRolloverClusterStateForDataStreamFailureStore() throws Exception dataStream.getFailureIndices().forEach(index -> builder.put(DataStreamTestHelper.getIndexMetadataBuilderForIndex(index))); builder.put(dataStream); final ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(builder).build(); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = new TestThreadPool(getTestName()); try { @@ -682,7 +693,8 @@ public void testRolloverClusterStateForDataStreamFailureStore() throws Exception dataStream, testThreadPool, Set.of(), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); @@ -782,13 +794,15 @@ public void testValidation() throws Exception { MetadataCreateIndexService createIndexService = mock(MetadataCreateIndexService.class); MetadataIndexAliasesService metadataIndexAliasesService = mock(MetadataIndexAliasesService.class); ClusterService clusterService = mock(ClusterService.class); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); MetadataRolloverService rolloverService = new MetadataRolloverService( null, createIndexService, metadataIndexAliasesService, EmptySystemIndices.INSTANCE, WriteLoadForecaster.DEFAULT, - clusterService + clusterService, + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); String newIndexName = useDataStream == false && randomBoolean() ? "logs-index-9" : null; @@ -821,13 +835,15 @@ public void testRolloverClusterStateForDataStreamNoTemplate() throws Exception { } builder.put(dataStream); final ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(builder).build(); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); ThreadPool testThreadPool = mock(ThreadPool.class); MetadataRolloverService rolloverService = DataStreamTestHelper.getMetadataRolloverService( dataStream, testThreadPool, Set.of(), - xContentRegistry() + xContentRegistry(), + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); MaxDocsCondition condition = new MaxDocsCondition(randomNonNegativeLong()); diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverActionTests.java index 427d2769b7399..42c4dec3e219b 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverActionTests.java @@ -64,6 +64,7 @@ import org.elasticsearch.indices.EmptySystemIndices; import org.elasticsearch.search.suggest.completion.CompletionStats; import org.elasticsearch.tasks.CancellableTask; +import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -105,13 +106,15 @@ public class TransportRolloverActionTests extends ESTestCase { final MetadataDataStreamsService mockMetadataDataStreamService = mock(MetadataDataStreamsService.class); final Client mockClient = mock(Client.class); final AllocationService mockAllocationService = mock(AllocationService.class); + final TestTelemetryPlugin telemetryPlugin = new TestTelemetryPlugin(); final MetadataRolloverService rolloverService = new MetadataRolloverService( mockThreadPool, mockCreateIndexService, mdIndexAliasesService, EmptySystemIndices.INSTANCE, WriteLoadForecaster.DEFAULT, - mockClusterService + mockClusterService, + telemetryPlugin.getTelemetryProvider(Settings.EMPTY) ); final DataStreamAutoShardingService dataStreamAutoShardingService = new DataStreamAutoShardingService( diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/metadata/DataStreamTestHelper.java b/test/framework/src/main/java/org/elasticsearch/cluster/metadata/DataStreamTestHelper.java index 6c038470b158d..e6252e46a12a3 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/metadata/DataStreamTestHelper.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/metadata/DataStreamTestHelper.java @@ -47,6 +47,7 @@ import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.ShardLimitValidator; import org.elasticsearch.script.ScriptCompiler; +import org.elasticsearch.telemetry.TelemetryProvider; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -623,7 +624,8 @@ public static MetadataRolloverService getMetadataRolloverService( DataStream dataStream, ThreadPool testThreadPool, Set providers, - NamedXContentRegistry registry + NamedXContentRegistry registry, + TelemetryProvider telemetryProvider ) throws Exception { DateFieldMapper dateFieldMapper = new DateFieldMapper.Builder( "@timestamp", @@ -684,7 +686,8 @@ public static MetadataRolloverService getMetadataRolloverService( indexAliasesService, EmptySystemIndices.INSTANCE, WriteLoadForecaster.DEFAULT, - clusterService + clusterService, + telemetryProvider ); } From 658c01401696c4685e8f7acf18865e2061fa534e Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Fri, 26 Apr 2024 08:03:30 -0700 Subject: [PATCH 09/14] Block readiness on bad initial file settings (#107775) If file settings have an update that fails, existing applied file settings continue to work. But if the initial file settings fail to process, readiness should be blocked. This commit adjusts readiness to look for this special initialization case. relates #107738 --- .../java/org/elasticsearch/readiness/ReadinessClusterIT.java | 1 - .../elasticsearch/cluster/metadata/ReservedStateMetadata.java | 4 +++- .../java/org/elasticsearch/readiness/ReadinessService.java | 2 +- .../cluster/metadata/ReservedStateMetadataTests.java | 3 ++- .../cluster/metadata/ToAndFromJsonMetadataTests.java | 4 ++-- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java b/server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java index 2ecdd06f379d2..1f8d55516d508 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java @@ -251,7 +251,6 @@ private void writeFileSettings(String json) throws Exception { logger.info("--> New file settings: [{}]", Strings.format(json, version)); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/107744") public void testNotReadyOnBadFileSettings() throws Exception { internalCluster().setBootstrapMasterNodeIndex(0); logger.info("--> start data node / non master node"); diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/ReservedStateMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/ReservedStateMetadata.java index dd5ca03cf759a..ec8200bf2d701 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/ReservedStateMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/ReservedStateMetadata.java @@ -46,6 +46,8 @@ public record ReservedStateMetadata( ReservedStateErrorMetadata errorMetadata ) implements SimpleDiffable, ToXContentFragment { + public static final Long NO_VERSION = Long.MIN_VALUE; // use min long as sentinel for uninitialized version + private static final ParseField VERSION = new ParseField("version"); private static final ParseField HANDLERS = new ParseField("handlers"); private static final ParseField ERRORS_METADATA = new ParseField("errors"); @@ -209,7 +211,7 @@ public static class Builder { */ public Builder(String namespace) { this.namespace = namespace; - this.version = -1L; + this.version = NO_VERSION; this.handlers = new HashMap<>(); this.errorMetadata = null; } diff --git a/server/src/main/java/org/elasticsearch/readiness/ReadinessService.java b/server/src/main/java/org/elasticsearch/readiness/ReadinessService.java index 1cac133106403..61425250c19b4 100644 --- a/server/src/main/java/org/elasticsearch/readiness/ReadinessService.java +++ b/server/src/main/java/org/elasticsearch/readiness/ReadinessService.java @@ -254,7 +254,7 @@ public void clusterChanged(ClusterChangedEvent event) { // protected to allow mock service to override protected boolean areFileSettingsApplied(ClusterState clusterState) { ReservedStateMetadata fileSettingsMetadata = clusterState.metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE); - return fileSettingsMetadata != null; + return fileSettingsMetadata != null && fileSettingsMetadata.version().equals(ReservedStateMetadata.NO_VERSION) == false; } private void setReady(boolean ready) { diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/ReservedStateMetadataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/ReservedStateMetadataTests.java index 46be49ad7111f..5086813cc5c13 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/ReservedStateMetadataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/ReservedStateMetadataTests.java @@ -20,6 +20,7 @@ import java.util.Collections; import java.util.List; +import static org.elasticsearch.cluster.metadata.ReservedStateMetadata.NO_VERSION; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; @@ -78,7 +79,7 @@ public void testXContent() throws IOException { public void testReservedStateVersionWithError() { final ReservedStateMetadata meta = createRandom(false, true); - assertEquals(-1L, meta.version().longValue()); + assertEquals(NO_VERSION.longValue(), meta.version().longValue()); } private static ReservedStateMetadata createRandom(boolean addHandlers, boolean addErrors) { diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetadataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetadataTests.java index aa9d0b9368fa6..3a522f3f5c06c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetadataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetadataTests.java @@ -776,7 +776,7 @@ public void testToXContentAPIReservedMetadata() throws IOException { }, "reserved_state" : { "namespace_one" : { - "version" : -1, + "version" : -9223372036854775808, "handlers" : { "one" : { "keys" : [ @@ -801,7 +801,7 @@ public void testToXContentAPIReservedMetadata() throws IOException { } }, "namespace_two" : { - "version" : -1, + "version" : -9223372036854775808, "handlers" : { "three" : { "keys" : [ From d7e524fcf9835f4b31369dd2cd0ef8da4994c9a3 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Fri, 26 Apr 2024 08:08:24 -0700 Subject: [PATCH 10/14] Make auto heap configuration configurable in server cli subclasses (#107919) This commit makes auto heap configuration extendible so that serverless can tweak the configuration based on project settings. --- .../server/cli/JvmOptionsParser.java | 20 +- .../server/cli/MachineDependentHeap.java | 228 ++++++++---------- .../elasticsearch/server/cli/ServerCli.java | 2 +- .../server/cli/MachineDependentHeapTests.java | 97 +++----- .../server/cli/NodeRoleParserTests.java | 103 -------- .../windows/service/WindowsServiceDaemon.java | 3 +- 6 files changed, 143 insertions(+), 310 deletions(-) delete mode 100644 distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/NodeRoleParserTests.java diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/JvmOptionsParser.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/JvmOptionsParser.java index 35f3f62122f0c..0bfa0f211807d 100644 --- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/JvmOptionsParser.java +++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/JvmOptionsParser.java @@ -72,13 +72,18 @@ SortedMap invalidLines() { * @param args the start-up arguments * @param processInfo information about the CLI process. * @param tmpDir the directory that should be passed to {@code -Djava.io.tmpdir} + * @param machineDependentHeap the heap configurator to use * @return the list of options to put on the Java command line * @throws InterruptedException if the java subprocess is interrupted * @throws IOException if there is a problem reading any of the files * @throws UserException if there is a problem parsing the `jvm.options` file or `jvm.options.d` files */ - public static List determineJvmOptions(ServerArgs args, ProcessInfo processInfo, Path tmpDir) throws InterruptedException, - IOException, UserException { + public static List determineJvmOptions( + ServerArgs args, + ProcessInfo processInfo, + Path tmpDir, + MachineDependentHeap machineDependentHeap + ) throws InterruptedException, IOException, UserException { final JvmOptionsParser parser = new JvmOptionsParser(); final Map substitutions = new HashMap<>(); @@ -89,7 +94,7 @@ public static List determineJvmOptions(ServerArgs args, ProcessInfo proc try { return Collections.unmodifiableList( - parser.jvmOptions(args, args.configDir(), tmpDir, envOptions, substitutions, processInfo.sysprops()) + parser.jvmOptions(args, args.configDir(), tmpDir, envOptions, substitutions, processInfo.sysprops(), machineDependentHeap) ); } catch (final JvmOptionsFileParserException e) { final String errorMessage = String.format( @@ -125,7 +130,8 @@ private List jvmOptions( Path tmpDir, final String esJavaOpts, final Map substitutions, - final Map cliSysprops + final Map cliSysprops, + final MachineDependentHeap machineDependentHeap ) throws InterruptedException, IOException, JvmOptionsFileParserException, UserException { final List jvmOptions = readJvmOptionsFiles(config); @@ -135,10 +141,8 @@ private List jvmOptions( } final List substitutedJvmOptions = substitutePlaceholders(jvmOptions, Collections.unmodifiableMap(substitutions)); - final MachineDependentHeap machineDependentHeap = new MachineDependentHeap( - new OverridableSystemMemoryInfo(substitutedJvmOptions, new DefaultSystemMemoryInfo()) - ); - substitutedJvmOptions.addAll(machineDependentHeap.determineHeapSettings(config, substitutedJvmOptions)); + final SystemMemoryInfo memoryInfo = new OverridableSystemMemoryInfo(substitutedJvmOptions, new DefaultSystemMemoryInfo()); + substitutedJvmOptions.addAll(machineDependentHeap.determineHeapSettings(args.nodeSettings(), memoryInfo, substitutedJvmOptions)); final List ergonomicJvmOptions = JvmErgonomics.choose(substitutedJvmOptions, args.nodeSettings()); final List systemJvmOptions = SystemJvmOptions.systemJvmOptions(args.nodeSettings(), cliSysprops); diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java index 87c4883ca3073..b7ef9e46a758d 100644 --- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java +++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java @@ -8,24 +8,22 @@ package org.elasticsearch.server.cli; -import org.elasticsearch.common.ParsingException; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.xcontent.XContentParserConfiguration; -import org.elasticsearch.xcontent.yaml.YamlXContent; +import org.elasticsearch.node.NodeRoleSettings; import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.function.Function; import static java.lang.Math.max; import static java.lang.Math.min; +import static org.elasticsearch.cluster.node.DiscoveryNodeRole.MASTER_ROLE; +import static org.elasticsearch.cluster.node.DiscoveryNodeRole.ML_ROLE; +import static org.elasticsearch.cluster.node.DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE; import static org.elasticsearch.server.cli.JvmOption.isInitialHeapSpecified; import static org.elasticsearch.server.cli.JvmOption.isMaxHeapSpecified; import static org.elasticsearch.server.cli.JvmOption.isMinHeapSpecified; @@ -33,28 +31,26 @@ /** * Determines optimal default heap settings based on available system memory and assigned node roles. */ -public final class MachineDependentHeap { +public class MachineDependentHeap { private static final long GB = 1024L * 1024L * 1024L; // 1GB private static final long MAX_HEAP_SIZE = GB * 31; // 31GB private static final long MIN_HEAP_SIZE = 1024 * 1024 * 128; // 128MB - private static final int DEFAULT_HEAP_SIZE_MB = 1024; - private static final String ELASTICSEARCH_YML = "elasticsearch.yml"; - private final SystemMemoryInfo systemMemoryInfo; - - public MachineDependentHeap(SystemMemoryInfo systemMemoryInfo) { - this.systemMemoryInfo = systemMemoryInfo; - } + public MachineDependentHeap() {} /** * Calculate heap options. * - * @param configDir path to config directory + * @param nodeSettings the settings for the node * @param userDefinedJvmOptions JVM arguments provided by the user * @return final heap options, or an empty collection if user provided heap options are to be used * @throws IOException if unable to load elasticsearch.yml */ - public List determineHeapSettings(Path configDir, List userDefinedJvmOptions) throws IOException, InterruptedException { + public final List determineHeapSettings( + Settings nodeSettings, + SystemMemoryInfo systemMemoryInfo, + List userDefinedJvmOptions + ) throws IOException, InterruptedException { // TODO: this could be more efficient, to only parse final options once final Map finalJvmOptions = JvmOption.findFinalOptions(userDefinedJvmOptions); if (isMaxHeapSpecified(finalJvmOptions) || isMinHeapSpecified(finalJvmOptions) || isInitialHeapSpecified(finalJvmOptions)) { @@ -62,139 +58,103 @@ public List determineHeapSettings(Path configDir, List userDefin return Collections.emptyList(); } - Path config = configDir.resolve(ELASTICSEARCH_YML); - try (InputStream in = Files.newInputStream(config)) { - return determineHeapSettings(in); - } - } - - List determineHeapSettings(InputStream config) { - MachineNodeRole nodeRole = NodeRoleParser.parse(config); + List roles = NodeRoleSettings.NODE_ROLES_SETTING.get(nodeSettings); long availableSystemMemory = systemMemoryInfo.availableSystemMemory(); - return options(nodeRole.heap(availableSystemMemory)); + MachineNodeRole nodeRole = mapNodeRole(roles); + return options(getHeapSizeMb(nodeSettings, nodeRole, availableSystemMemory)); } - private static List options(int heapSize) { - return List.of("-Xms" + heapSize + "m", "-Xmx" + heapSize + "m"); - } - - /** - * Parses role information from elasticsearch.yml and determines machine node role. - */ - static class NodeRoleParser { - - @SuppressWarnings("unchecked") - public static MachineNodeRole parse(InputStream config) { - final Settings settings; - try (var parser = YamlXContent.yamlXContent.createParser(XContentParserConfiguration.EMPTY, config)) { - if (parser.currentToken() == null && parser.nextToken() == null) { - settings = null; + protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long availableMemory) { + return switch (role) { + /* + * Master-only node. + * + *

Heap is computed as 60% of total system memory up to a maximum of 31 gigabytes. + */ + case MASTER_ONLY -> mb(min((long) (availableMemory * .6), MAX_HEAP_SIZE)); + /* + * Machine learning only node. + * + *

Heap is computed as: + *

    + *
  • 40% of total system memory when total system memory 16 gigabytes or less.
  • + *
  • 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes.
  • + *
  • The absolute maximum heap size is 31 gigabytes.
  • + *
+ * + * In all cases the result is rounded down to the next whole multiple of 4 megabytes. + * The reason for doing this is that Java will round requested heap sizes to a multiple + * of 4 megabytes (certainly versions 11 to 18 do this), so by doing this ourselves we + * are more likely to actually get the amount we request. This is worthwhile for ML where + * the ML autoscaling code needs to be able to calculate the JVM size for different sizes + * of ML node, and if Java is also rounding then this causes a discrepancy. It's possible + * that a future version of Java could round to an even bigger number of megabytes, which + * would cause a discrepancy for people using that version of Java. But there's no harm + * in a bit of extra rounding here - it can only reduce discrepancies. + * + * If this formula is changed then corresponding changes must be made to the {@code NativeMemoryCalculator} and + * {@code MlAutoscalingDeciderServiceTests} classes in the ML plugin code. Failure to keep the logic synchronized + * could result in repeated autoscaling up and down. + */ + case ML_ONLY -> { + if (availableMemory <= (GB * 16)) { + yield mb((long) (availableMemory * .4), 4); } else { - settings = Settings.fromXContent(parser); + yield mb((long) min((GB * 16) * .4 + (availableMemory - GB * 16) * .1, MAX_HEAP_SIZE), 4); } - } catch (IOException | ParsingException ex) { - // Strangely formatted config, so just return defaults and let startup settings validation catch the problem - return MachineNodeRole.UNKNOWN; } - - if (settings != null && settings.isEmpty() == false) { - List roles = settings.getAsList("node.roles"); - - if (roles.isEmpty()) { - // If roles are missing or empty (coordinating node) assume defaults and consider this a data node - return MachineNodeRole.DATA; - } else if (containsOnly(roles, "master")) { - return MachineNodeRole.MASTER_ONLY; - } else if (roles.contains("ml") && containsOnly(roles, "ml", "remote_cluster_client")) { - return MachineNodeRole.ML_ONLY; + /* + * Data node. Essentially any node that isn't a master or ML only node. + * + *

Heap is computed as: + *

    + *
  • 40% of total system memory when less than 1 gigabyte with a minimum of 128 megabytes.
  • + *
  • 50% of total system memory when greater than 1 gigabyte up to a maximum of 31 gigabytes.
  • + *
+ */ + case DATA -> { + if (availableMemory < GB) { + yield mb(max((long) (availableMemory * .4), MIN_HEAP_SIZE)); } else { - return MachineNodeRole.DATA; + yield mb(min((long) (availableMemory * .5), MAX_HEAP_SIZE)); } - } else { // if the config is completely empty, then assume defaults and consider this a data node - return MachineNodeRole.DATA; } - } - - @SuppressWarnings("unchecked") - private static boolean containsOnly(Collection collection, T... items) { - return Arrays.asList(items).containsAll(collection); - } + }; } - enum MachineNodeRole { - /** - * Master-only node. - * - *

Heap is computed as 60% of total system memory up to a maximum of 31 gigabytes. - */ - MASTER_ONLY(m -> mb(min((long) (m * .6), MAX_HEAP_SIZE))), - - /** - * Machine learning only node. - * - *

Heap is computed as: - *

    - *
  • 40% of total system memory when total system memory 16 gigabytes or less.
  • - *
  • 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes.
  • - *
  • The absolute maximum heap size is 31 gigabytes.
  • - *
- * - * In all cases the result is rounded down to the next whole multiple of 4 megabytes. - * The reason for doing this is that Java will round requested heap sizes to a multiple - * of 4 megabytes (certainly versions 11 to 18 do this), so by doing this ourselves we - * are more likely to actually get the amount we request. This is worthwhile for ML where - * the ML autoscaling code needs to be able to calculate the JVM size for different sizes - * of ML node, and if Java is also rounding then this causes a discrepancy. It's possible - * that a future version of Java could round to an even bigger number of megabytes, which - * would cause a discrepancy for people using that version of Java. But there's no harm - * in a bit of extra rounding here - it can only reduce discrepancies. - * - * If this formula is changed then corresponding changes must be made to the {@code NativeMemoryCalculator} and - * {@code MlAutoscalingDeciderServiceTests} classes in the ML plugin code. Failure to keep the logic synchronized - * could result in repeated autoscaling up and down. - */ - ML_ONLY(m -> mb(m <= (GB * 16) ? (long) (m * .4) : (long) min((GB * 16) * .4 + (m - GB * 16) * .1, MAX_HEAP_SIZE), 4)), - - /** - * Data node. Essentially any node that isn't a master or ML only node. - * - *

Heap is computed as: - *

    - *
  • 40% of total system memory when less than 1 gigabyte with a minimum of 128 megabytes.
  • - *
  • 50% of total system memory when greater than 1 gigabyte up to a maximum of 31 gigabytes.
  • - *
- */ - DATA(m -> mb(m < GB ? max((long) (m * .4), MIN_HEAP_SIZE) : min((long) (m * .5), MAX_HEAP_SIZE))), - - /** - * Unknown role node. - * - *

Hard-code heap to a default of 1 gigabyte. - */ - UNKNOWN(m -> DEFAULT_HEAP_SIZE_MB); + protected static int mb(long bytes) { + return (int) (bytes / (1024 * 1024)); + } - private final Function formula; + protected static int mb(long bytes, int toLowerMultipleOfMb) { + return toLowerMultipleOfMb * (int) (bytes / (1024 * 1024 * toLowerMultipleOfMb)); + } - MachineNodeRole(Function formula) { - this.formula = formula; + private static MachineNodeRole mapNodeRole(List roles) { + if (roles.isEmpty()) { + // If roles are missing or empty (coordinating node) assume defaults and consider this a data node + return MachineNodeRole.DATA; + } else if (containsOnly(roles, MASTER_ROLE)) { + return MachineNodeRole.MASTER_ONLY; + } else if (roles.contains(ML_ROLE) && containsOnly(roles, ML_ROLE, REMOTE_CLUSTER_CLIENT_ROLE)) { + return MachineNodeRole.ML_ONLY; + } else { + return MachineNodeRole.DATA; } + } - /** - * Determine the appropriate heap size for the given role and available system memory. - * - * @param systemMemory total available system memory in bytes - * @return recommended heap size in megabytes - */ - public int heap(long systemMemory) { - return formula.apply(systemMemory); - } + @SuppressWarnings("unchecked") + private static boolean containsOnly(Collection collection, T... items) { + return Arrays.asList(items).containsAll(collection); + } - private static int mb(long bytes) { - return (int) (bytes / (1024 * 1024)); - } + private static List options(int heapSize) { + return List.of("-Xms" + heapSize + "m", "-Xmx" + heapSize + "m"); + } - private static int mb(long bytes, int toLowerMultipleOfMb) { - return toLowerMultipleOfMb * (int) (bytes / (1024 * 1024 * toLowerMultipleOfMb)); - } + protected enum MachineNodeRole { + MASTER_ONLY, + ML_ONLY, + DATA; } } diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/ServerCli.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/ServerCli.java index 6dbff2fbfff9c..0505ab86127cf 100644 --- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/ServerCli.java +++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/ServerCli.java @@ -250,7 +250,7 @@ protected Command loadTool(String toolname, String libs) { // protected to allow tests to override protected ServerProcess startServer(Terminal terminal, ProcessInfo processInfo, ServerArgs args) throws Exception { var tempDir = ServerProcessUtils.setupTempDir(processInfo); - var jvmOptions = JvmOptionsParser.determineJvmOptions(args, processInfo, tempDir); + var jvmOptions = JvmOptionsParser.determineJvmOptions(args, processInfo, tempDir, new MachineDependentHeap()); var serverProcessBuilder = new ServerProcessBuilder().withTerminal(terminal) .withProcessInfo(processInfo) .withServerArgs(args) diff --git a/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/MachineDependentHeapTests.java b/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/MachineDependentHeapTests.java index 5b30c2246c624..0774773cbfa0b 100644 --- a/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/MachineDependentHeapTests.java +++ b/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/MachineDependentHeapTests.java @@ -8,16 +8,13 @@ package org.elasticsearch.server.cli; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase.WithoutSecurityManager; +import org.hamcrest.Matcher; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; import java.net.URISyntaxException; import java.net.URL; -import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collections; @@ -25,95 +22,69 @@ import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.empty; -import static org.junit.Assert.assertThat; // TODO: rework these tests to mock jvm option finder so they can run with security manager, no forking needed @WithoutSecurityManager public class MachineDependentHeapTests extends ESTestCase { public void testDefaultHeapSize() throws Exception { - MachineDependentHeap heap = new MachineDependentHeap(systemMemoryInGigabytes(8)); - List options = heap.determineHeapSettings(configPath(), Collections.emptyList()); + MachineDependentHeap heap = new MachineDependentHeap(); + List options = heap.determineHeapSettings(Settings.EMPTY, systemMemoryInGigabytes(8), Collections.emptyList()); assertThat(options, containsInAnyOrder("-Xmx4096m", "-Xms4096m")); } public void testUserPassedHeapArgs() throws Exception { - MachineDependentHeap heap = new MachineDependentHeap(systemMemoryInGigabytes(8)); - List options = heap.determineHeapSettings(configPath(), List.of("-Xmx4g")); + var systemMemoryInfo = systemMemoryInGigabytes(8); + MachineDependentHeap heap = new MachineDependentHeap(); + List options = heap.determineHeapSettings(Settings.EMPTY, systemMemoryInfo, List.of("-Xmx4g")); assertThat(options, empty()); - options = heap.determineHeapSettings(configPath(), List.of("-Xms4g")); + options = heap.determineHeapSettings(Settings.EMPTY, systemMemoryInfo, List.of("-Xms4g")); assertThat(options, empty()); } // Explicitly test odd heap sizes // See: https://github.com/elastic/elasticsearch/issues/86431 public void testOddUserPassedHeapArgs() throws Exception { - MachineDependentHeap heap = new MachineDependentHeap(systemMemoryInGigabytes(8)); - List options = heap.determineHeapSettings(configPath(), List.of("-Xmx409m")); + var systemMemoryInfo = systemMemoryInGigabytes(8); + MachineDependentHeap heap = new MachineDependentHeap(); + List options = heap.determineHeapSettings(Settings.EMPTY, systemMemoryInfo, List.of("-Xmx409m")); assertThat(options, empty()); - options = heap.determineHeapSettings(configPath(), List.of("-Xms409m")); + options = heap.determineHeapSettings(Settings.EMPTY, systemMemoryInfo, List.of("-Xms409m")); assertThat(options, empty()); } - public void testMasterOnlyOptions() { - List options = calculateHeap(16, "master"); - assertThat(options, containsInAnyOrder("-Xmx9830m", "-Xms9830m")); - - options = calculateHeap(64, "master"); - assertThat(options, containsInAnyOrder("-Xmx31744m", "-Xms31744m")); + public void testMasterOnlyOptions() throws Exception { + assertHeapOptions(16, containsInAnyOrder("-Xmx9830m", "-Xms9830m"), "master"); + assertHeapOptions(64, containsInAnyOrder("-Xmx31744m", "-Xms31744m"), "master"); } - public void testMlOnlyOptions() { - List options = calculateHeap(1, "ml"); - assertThat(options, containsInAnyOrder("-Xmx408m", "-Xms408m")); - - options = calculateHeap(4, "ml"); - assertThat(options, containsInAnyOrder("-Xmx1636m", "-Xms1636m")); - - options = calculateHeap(32, "ml"); - assertThat(options, containsInAnyOrder("-Xmx8192m", "-Xms8192m")); - - options = calculateHeap(64, "ml"); - assertThat(options, containsInAnyOrder("-Xmx11468m", "-Xms11468m")); - + public void testMlOnlyOptions() throws Exception { + assertHeapOptions(1, containsInAnyOrder("-Xmx408m", "-Xms408m"), "ml"); + assertHeapOptions(4, containsInAnyOrder("-Xmx1636m", "-Xms1636m"), "ml"); + assertHeapOptions(32, containsInAnyOrder("-Xmx8192m", "-Xms8192m"), "ml"); + assertHeapOptions(64, containsInAnyOrder("-Xmx11468m", "-Xms11468m"), "ml"); // We'd never see a node this big in Cloud, but this assertion proves that the 31GB absolute maximum // eventually kicks in (because 0.4 * 16 + 0.1 * (263 - 16) > 31) - options = calculateHeap(263, "ml"); - assertThat(options, containsInAnyOrder("-Xmx31744m", "-Xms31744m")); - - } - - public void testDataNodeOptions() { - List options = calculateHeap(1, "data"); - assertThat(options, containsInAnyOrder("-Xmx512m", "-Xms512m")); - - options = calculateHeap(8, "data"); - assertThat(options, containsInAnyOrder("-Xmx4096m", "-Xms4096m")); - - options = calculateHeap(64, "data"); - assertThat(options, containsInAnyOrder("-Xmx31744m", "-Xms31744m")); - - options = calculateHeap(0.5, "data"); - assertThat(options, containsInAnyOrder("-Xmx204m", "-Xms204m")); - - options = calculateHeap(0.2, "data"); - assertThat(options, containsInAnyOrder("-Xmx128m", "-Xms128m")); + assertHeapOptions(263, containsInAnyOrder("-Xmx31744m", "-Xms31744m"), "ml"); } - private static List calculateHeap(double memoryInGigabytes, String... roles) { - MachineDependentHeap machineDependentHeap = new MachineDependentHeap(systemMemoryInGigabytes(memoryInGigabytes)); - String configYaml = "node.roles: [" + String.join(",", roles) + "]"; - return calculateHeap(machineDependentHeap, configYaml); + public void testDataNodeOptions() throws Exception { + assertHeapOptions(1, containsInAnyOrder("-Xmx512m", "-Xms512m"), "data"); + assertHeapOptions(8, containsInAnyOrder("-Xmx4096m", "-Xms4096m"), "data"); + assertHeapOptions(64, containsInAnyOrder("-Xmx31744m", "-Xms31744m"), "data"); + assertHeapOptions(0.5, containsInAnyOrder("-Xmx204m", "-Xms204m"), "data"); + assertHeapOptions(0.2, containsInAnyOrder("-Xmx128m", "-Xms128m"), "data"); } - private static List calculateHeap(MachineDependentHeap machineDependentHeap, String configYaml) { - try (InputStream in = new ByteArrayInputStream(configYaml.getBytes(StandardCharsets.UTF_8))) { - return machineDependentHeap.determineHeapSettings(in); - } catch (IOException e) { - throw new UncheckedIOException(e); - } + private void assertHeapOptions(double memoryInGigabytes, Matcher> optionsMatcher, String... roles) + throws Exception { + SystemMemoryInfo systemMemoryInfo = systemMemoryInGigabytes(memoryInGigabytes); + MachineDependentHeap machineDependentHeap = new MachineDependentHeap(); + Settings nodeSettings = Settings.builder().putList("node.roles", roles).build(); + List heapOptions = machineDependentHeap.determineHeapSettings(nodeSettings, systemMemoryInfo, Collections.emptyList()); + assertThat(heapOptions, optionsMatcher); } private static SystemMemoryInfo systemMemoryInGigabytes(double gigabytes) { diff --git a/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/NodeRoleParserTests.java b/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/NodeRoleParserTests.java deleted file mode 100644 index 4d501c1116732..0000000000000 --- a/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/NodeRoleParserTests.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ - -package org.elasticsearch.server.cli; - -import org.elasticsearch.test.ESTestCase; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.function.Consumer; - -import static org.elasticsearch.server.cli.MachineDependentHeap.MachineNodeRole.DATA; -import static org.elasticsearch.server.cli.MachineDependentHeap.MachineNodeRole.MASTER_ONLY; -import static org.elasticsearch.server.cli.MachineDependentHeap.MachineNodeRole.ML_ONLY; -import static org.elasticsearch.server.cli.MachineDependentHeap.MachineNodeRole.UNKNOWN; -import static org.hamcrest.CoreMatchers.not; -import static org.hamcrest.Matchers.equalTo; -import static org.junit.Assert.assertThat; - -public class NodeRoleParserTests extends ESTestCase { - - public void testMasterOnlyNode() throws IOException { - MachineDependentHeap.MachineNodeRole nodeRole = parseConfig(sb -> sb.append("node.roles: [master]")); - assertThat(nodeRole, equalTo(MASTER_ONLY)); - - nodeRole = parseConfig(sb -> sb.append("node.roles: [master, some_other_role]")); - assertThat(nodeRole, not(equalTo(MASTER_ONLY))); - } - - public void testMlOnlyNode() throws IOException { - MachineDependentHeap.MachineNodeRole nodeRole = parseConfig(sb -> sb.append("node.roles: [ml]")); - assertThat(nodeRole, equalTo(ML_ONLY)); - - nodeRole = parseConfig(sb -> sb.append("node.roles: [ml, remote_cluster_client]")); - assertThat(nodeRole, equalTo(ML_ONLY)); - - nodeRole = parseConfig(sb -> sb.append("node.roles: [remote_cluster_client, ml]")); - assertThat(nodeRole, equalTo(ML_ONLY)); - - nodeRole = parseConfig(sb -> sb.append("node.roles: [remote_cluster_client]")); - assertThat(nodeRole, not(equalTo(ML_ONLY))); - - nodeRole = parseConfig(sb -> sb.append("node.roles: [ml, some_other_role]")); - assertThat(nodeRole, not(equalTo(ML_ONLY))); - } - - public void testDataNode() throws IOException { - MachineDependentHeap.MachineNodeRole nodeRole = parseConfig(sb -> {}); - assertThat(nodeRole, equalTo(DATA)); - - nodeRole = parseConfig(sb -> sb.append("node.roles: []")); - assertThat(nodeRole, equalTo(DATA)); - - nodeRole = parseConfig(sb -> sb.append("node.roles: [some_unknown_role]")); - assertThat(nodeRole, equalTo(DATA)); - - nodeRole = parseConfig(sb -> sb.append("node.roles: [master, ingest]")); - assertThat(nodeRole, equalTo(DATA)); - - nodeRole = parseConfig(sb -> sb.append("node.roles: [ml, master]")); - assertThat(nodeRole, equalTo(DATA)); - } - - public void testYamlSyntax() throws IOException { - MachineDependentHeap.MachineNodeRole nodeRole = parseConfig(sb -> sb.append(""" - node: - roles: - - master""")); - assertThat(nodeRole, equalTo(MASTER_ONLY)); - - nodeRole = parseConfig(sb -> sb.append(""" - node: - roles: [ml]""")); - assertThat(nodeRole, equalTo(ML_ONLY)); - } - - public void testInvalidYaml() throws IOException { - MachineDependentHeap.MachineNodeRole nodeRole = parseConfig(sb -> sb.append("notyaml")); - assertThat(nodeRole, equalTo(UNKNOWN)); - } - - public void testInvalidRoleSyntax() throws IOException { - MachineDependentHeap.MachineNodeRole nodeRole = parseConfig(sb -> sb.append("node.roles: foo")); - // roles we don't know about are considered data, but will fail validation when ES starts up - assertThat(nodeRole, equalTo(DATA)); - } - - private static MachineDependentHeap.MachineNodeRole parseConfig(Consumer action) throws IOException { - StringBuilder sb = new StringBuilder(); - action.accept(sb); - - try (InputStream config = new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8))) { - return MachineDependentHeap.NodeRoleParser.parse(config); - } - } -} diff --git a/distribution/tools/windows-service-cli/src/main/java/org/elasticsearch/windows/service/WindowsServiceDaemon.java b/distribution/tools/windows-service-cli/src/main/java/org/elasticsearch/windows/service/WindowsServiceDaemon.java index 2c42dcf5cb2f5..22474e63ab0df 100644 --- a/distribution/tools/windows-service-cli/src/main/java/org/elasticsearch/windows/service/WindowsServiceDaemon.java +++ b/distribution/tools/windows-service-cli/src/main/java/org/elasticsearch/windows/service/WindowsServiceDaemon.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.env.Environment; import org.elasticsearch.server.cli.JvmOptionsParser; +import org.elasticsearch.server.cli.MachineDependentHeap; import org.elasticsearch.server.cli.ServerProcess; import org.elasticsearch.server.cli.ServerProcessBuilder; import org.elasticsearch.server.cli.ServerProcessUtils; @@ -42,7 +43,7 @@ public void execute(Terminal terminal, OptionSet options, Environment env, Proce try (var loadedSecrets = KeyStoreWrapper.bootstrap(env.configFile(), () -> new SecureString(new char[0]))) { var args = new ServerArgs(false, true, null, loadedSecrets, env.settings(), env.configFile(), env.logsFile()); var tempDir = ServerProcessUtils.setupTempDir(processInfo); - var jvmOptions = JvmOptionsParser.determineJvmOptions(args, processInfo, tempDir); + var jvmOptions = JvmOptionsParser.determineJvmOptions(args, processInfo, tempDir, new MachineDependentHeap()); var serverProcessBuilder = new ServerProcessBuilder().withTerminal(terminal) .withProcessInfo(processInfo) .withServerArgs(args) From 01cc967ce9306b5f21939efadc3771a6d0f323db Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Fri, 26 Apr 2024 09:34:11 -0700 Subject: [PATCH 11/14] Mute synthetic source YAML tests (#107958) Relates #107567 --- .../indices.create/20_synthetic_source.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 874778f9bdb5c..9376f3598d6f1 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -39,9 +39,9 @@ nested is disabled: --- object with unmapped fields: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored values + - skip: + version: " - " + reason: "mapper.track_ignored_source" - do: indices.create: @@ -84,9 +84,9 @@ object with unmapped fields: --- nested object with unmapped fields: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored values + - skip: + version: " - " + reason: "mapper.track_ignored_source" - do: indices.create: @@ -130,9 +130,9 @@ nested object with unmapped fields: --- empty object with unmapped fields: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored values + - skip: + version: " - " + reason: "mapper.track_ignored_source" - do: indices.create: From ca513b17527adb7e9f1a97872e1ab12474cca391 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Fri, 26 Apr 2024 10:12:17 -0700 Subject: [PATCH 12/14] Annotated text fields are stored by default with synthetic source (#107922) This change follows existing implementation for text field. Closes #107734. --- docs/changelog/107922.yaml | 6 ++ .../AnnotatedTextFieldMapper.java | 30 ++++++--- .../AnnotatedTextFieldMapperTests.java | 61 +++++++++++++++++++ .../AnnotatedTextFieldTypeTests.java | 9 ++- 4 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 docs/changelog/107922.yaml diff --git a/docs/changelog/107922.yaml b/docs/changelog/107922.yaml new file mode 100644 index 0000000000000..e28d0f6262af4 --- /dev/null +++ b/docs/changelog/107922.yaml @@ -0,0 +1,6 @@ +pr: 107922 +summary: Feature/annotated text store defaults +area: Mapping +type: enhancement +issues: + - 107734 diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 6d2b83185d5b7..e5e396888e168 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -83,11 +83,7 @@ private static NamedAnalyzer wrapAnalyzer(NamedAnalyzer in) { public static class Builder extends FieldMapper.Builder { - private final Parameter store = Parameter.storeParam(m -> builder(m).store.getValue(), false); - - final TextParams.Analyzers analyzers; final Parameter similarity = TextParams.similarity(m -> builder(m).similarity.getValue()); - final Parameter indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue()); final Parameter norms = TextParams.norms(true, m -> builder(m).norms.getValue()); final Parameter termVectors = TextParams.termVectors(m -> builder(m).termVectors.getValue()); @@ -95,8 +91,16 @@ public static class Builder extends FieldMapper.Builder { private final Parameter> meta = Parameter.metaParam(); private final IndexVersion indexCreatedVersion; + private final TextParams.Analyzers analyzers; + private final boolean isSyntheticSourceEnabledViaIndexMode; + private final Parameter store; - public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + public Builder( + String name, + IndexVersion indexCreatedVersion, + IndexAnalyzers indexAnalyzers, + boolean isSyntheticSourceEnabledViaIndexMode + ) { super(name); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( @@ -105,6 +109,11 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind m -> builder(m).analyzers.positionIncrementGap.getValue(), indexCreatedVersion ); + this.isSyntheticSourceEnabledViaIndexMode = isSyntheticSourceEnabledViaIndexMode; + this.store = Parameter.storeParam( + m -> builder(m).store.getValue(), + () -> isSyntheticSourceEnabledViaIndexMode && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false + ); } @Override @@ -164,7 +173,9 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) { } } - public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + public static TypeParser PARSER = new TypeParser( + (n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.getIndexSettings().getMode().isSyntheticSourceEnabled()) + ); /** * Parses markdown-like syntax into plain text and AnnotationTokens with offsets for @@ -552,7 +563,12 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this); + return new Builder( + simpleName(), + builder.indexCreatedVersion, + builder.analyzers.indexAnalyzers, + builder.isSyntheticSourceEnabledViaIndexMode + ).init(this); } @Override diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java index 3b27cdb132851..4e3a53d64a841 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java @@ -24,7 +24,9 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.Strings; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.CharFilterFactory; @@ -42,6 +44,7 @@ import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.TextFieldFamilySyntheticSourceTestSetup; import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; @@ -288,6 +291,64 @@ public void testEnableStore() throws IOException { assertTrue(fields.get(0).fieldType().stored()); } + public void testStoreParameterDefaults() throws IOException { + var timeSeriesIndexMode = randomBoolean(); + var isStored = randomBoolean(); + var hasKeywordFieldForSyntheticSource = randomBoolean(); + + var indexSettingsBuilder = getIndexSettingsBuilder(); + if (timeSeriesIndexMode) { + indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES) + .putList(IndexMetadata.INDEX_ROUTING_PATH.getKey(), "dimension") + .put(IndexSettings.TIME_SERIES_START_TIME.getKey(), "2000-01-08T23:40:53.384Z") + .put(IndexSettings.TIME_SERIES_END_TIME.getKey(), "2106-01-08T23:40:53.384Z"); + } + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("field"); + b.field("type", "annotated_text"); + if (isStored) { + b.field("store", isStored); + } + if (hasKeywordFieldForSyntheticSource) { + b.startObject("fields"); + b.startObject("keyword"); + b.field("type", "keyword"); + b.endObject(); + b.endObject(); + } + b.endObject(); + + if (timeSeriesIndexMode) { + b.startObject("@timestamp"); + b.field("type", "date"); + b.endObject(); + b.startObject("dimension"); + b.field("type", "keyword"); + b.field("time_series_dimension", "true"); + b.endObject(); + } + }); + DocumentMapper mapper = createMapperService(getVersion(), indexSettings, () -> true, mapping).documentMapper(); + + var source = source(TimeSeriesRoutingHashFieldMapper.DUMMY_ENCODED_VALUE, b -> { + b.field("field", "1234"); + if (timeSeriesIndexMode) { + b.field("@timestamp", randomMillisUpToYear9999()); + b.field("dimension", "dimension1"); + } + }, null); + ParsedDocument doc = mapper.parse(source); + List fields = doc.rootDoc().getFields("field"); + IndexableFieldType fieldType = fields.get(0).fieldType(); + if (isStored || (timeSeriesIndexMode && hasKeywordFieldForSyntheticSource == false)) { + assertTrue(fieldType.stored()); + } else { + assertFalse(fieldType.stored()); + } + } + public void testDisableNorms() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> { diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java index 1b9f3b9447378..2a78699c8a4a9 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java @@ -29,9 +29,12 @@ public void testIntervals() throws IOException { } public void testFetchSourceValue() throws IOException { - MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", IndexVersion.current(), createDefaultIndexAnalyzers()) - .build(MapperBuilderContext.root(false, false)) - .fieldType(); + MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder( + "field", + IndexVersion.current(), + createDefaultIndexAnalyzers(), + false + ).build(MapperBuilderContext.root(false, false)).fieldType(); assertEquals(List.of("value"), fetchSourceValue(fieldType, "value")); assertEquals(List.of("42"), fetchSourceValue(fieldType, 42L)); From 22aad7b2010c7797eb5cbc562cecea28dbfe375b Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Fri, 26 Apr 2024 12:15:48 -0700 Subject: [PATCH 13/14] Support metrics counter types in ESQL (#107877) This commit adds support for numeric metrics counter fields in ES|QL. These counter types, including counter_long, counter_integer, and counter_double, are different from their parent types. Users will have limited interaction with these counter types, restricted to: - Retrieving values without any processing - Casting to their root type (e.g., to_long(a_long_counter)) - Using them in the metrics rate aggregation These restrictions are intentional to prevent misuse. If users want to use them as numeric values, explicit casting to their root types is required. --- docs/changelog/107877.yaml | 5 ++ .../kibana/definition/to_double.json | 36 +++++++++ .../kibana/definition/to_integer.json | 12 +++ .../functions/kibana/definition/to_long.json | 24 ++++++ .../esql/functions/types/to_double.asciidoc | 3 + .../esql/functions/types/to_integer.asciidoc | 1 + .../esql/functions/types/to_long.asciidoc | 2 + .../index/mapper/NumberFieldMapper.java | 4 - .../src/main/resources/meta.csv-spec | 20 ++--- .../src/main/resources/tsdb-mapping.json | 30 ++++++++ .../xpack/esql/action/PositionToXContent.java | 6 +- .../xpack/esql/action/ResponseValueUtils.java | 12 +-- .../xpack/esql/analysis/Verifier.java | 4 + .../expression/function/aggregate/Avg.java | 2 +- .../expression/function/aggregate/Count.java | 5 +- .../function/aggregate/CountDistinct.java | 2 +- .../expression/function/aggregate/Median.java | 2 +- .../function/aggregate/NumericAggregate.java | 4 +- .../function/scalar/convert/ToDouble.java | 19 ++++- .../function/scalar/convert/ToInteger.java | 6 +- .../function/scalar/convert/ToLong.java | 17 ++++- .../esql/planner/LocalExecutionPlanner.java | 3 +- .../xpack/esql/planner/PlannerUtils.java | 9 ++- .../xpack/esql/plugin/EsqlFeatures.java | 8 +- .../xpack/esql/type/EsqlDataTypeRegistry.java | 7 +- .../xpack/esql/type/EsqlDataTypes.java | 27 ++++++- .../esql/action/EsqlQueryResponseTests.java | 6 +- .../esql/analysis/AnalyzerTestUtils.java | 4 + .../xpack/esql/analysis/AnalyzerTests.java | 76 +++++++++---------- .../xpack/esql/analysis/VerifierTests.java | 41 +++++++++- .../function/AbstractFunctionTestCase.java | 57 +++++++++++++- .../scalar/convert/ToDoubleTests.java | 27 +++++++ .../scalar/convert/ToIntegerTests.java | 10 +++ .../function/scalar/convert/ToLongTests.java | 18 +++++ .../esql/type/EsqlDataTypeRegistryTests.java | 6 +- .../elasticsearch/xpack/ql/type/Types.java | 8 +- .../xpack/ql/type/TypesTests.java | 3 +- .../rest-api-spec/test/esql/40_tsdb.yml | 60 +++++++++++---- 38 files changed, 476 insertions(+), 110 deletions(-) create mode 100644 docs/changelog/107877.yaml create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/tsdb-mapping.json diff --git a/docs/changelog/107877.yaml b/docs/changelog/107877.yaml new file mode 100644 index 0000000000000..cf458b3aa3a42 --- /dev/null +++ b/docs/changelog/107877.yaml @@ -0,0 +1,5 @@ +pr: 107877 +summary: Support metrics counter types in ESQL +area: "ES|QL" +type: enhancement +issues: [] diff --git a/docs/reference/esql/functions/kibana/definition/to_double.json b/docs/reference/esql/functions/kibana/definition/to_double.json index 4a466e76562e9..f4e414068db61 100644 --- a/docs/reference/esql/functions/kibana/definition/to_double.json +++ b/docs/reference/esql/functions/kibana/definition/to_double.json @@ -16,6 +16,42 @@ "variadic" : false, "returnType" : "double" }, + { + "params" : [ + { + "name" : "field", + "type" : "counter_double", + "optional" : false, + "description" : "Input value. The input can be a single- or multi-valued column or an expression." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "field", + "type" : "counter_integer", + "optional" : false, + "description" : "Input value. The input can be a single- or multi-valued column or an expression." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "field", + "type" : "counter_long", + "optional" : false, + "description" : "Input value. The input can be a single- or multi-valued column or an expression." + } + ], + "variadic" : false, + "returnType" : "double" + }, { "params" : [ { diff --git a/docs/reference/esql/functions/kibana/definition/to_integer.json b/docs/reference/esql/functions/kibana/definition/to_integer.json index 4284265c4f93c..2776d8b29c412 100644 --- a/docs/reference/esql/functions/kibana/definition/to_integer.json +++ b/docs/reference/esql/functions/kibana/definition/to_integer.json @@ -16,6 +16,18 @@ "variadic" : false, "returnType" : "integer" }, + { + "params" : [ + { + "name" : "field", + "type" : "counter_integer", + "optional" : false, + "description" : "Input value. The input can be a single- or multi-valued column or an expression." + } + ], + "variadic" : false, + "returnType" : "integer" + }, { "params" : [ { diff --git a/docs/reference/esql/functions/kibana/definition/to_long.json b/docs/reference/esql/functions/kibana/definition/to_long.json index 25e7f82f18547..e3218eba9642a 100644 --- a/docs/reference/esql/functions/kibana/definition/to_long.json +++ b/docs/reference/esql/functions/kibana/definition/to_long.json @@ -16,6 +16,30 @@ "variadic" : false, "returnType" : "long" }, + { + "params" : [ + { + "name" : "field", + "type" : "counter_integer", + "optional" : false, + "description" : "Input value. The input can be a single- or multi-valued column or an expression." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "counter_long", + "optional" : false, + "description" : "Input value. The input can be a single- or multi-valued column or an expression." + } + ], + "variadic" : false, + "returnType" : "long" + }, { "params" : [ { diff --git a/docs/reference/esql/functions/types/to_double.asciidoc b/docs/reference/esql/functions/types/to_double.asciidoc index c78c3974af5a5..cff686c7bc4ca 100644 --- a/docs/reference/esql/functions/types/to_double.asciidoc +++ b/docs/reference/esql/functions/types/to_double.asciidoc @@ -6,6 +6,9 @@ |=== field | result boolean | double +counter_double | double +counter_integer | double +counter_long | double datetime | double double | double integer | double diff --git a/docs/reference/esql/functions/types/to_integer.asciidoc b/docs/reference/esql/functions/types/to_integer.asciidoc index 11fd7914c5b0f..974f3c9c82d88 100644 --- a/docs/reference/esql/functions/types/to_integer.asciidoc +++ b/docs/reference/esql/functions/types/to_integer.asciidoc @@ -6,6 +6,7 @@ |=== field | result boolean | integer +counter_integer | integer datetime | integer double | integer integer | integer diff --git a/docs/reference/esql/functions/types/to_long.asciidoc b/docs/reference/esql/functions/types/to_long.asciidoc index 4bc927fd94697..b3959c5444e34 100644 --- a/docs/reference/esql/functions/types/to_long.asciidoc +++ b/docs/reference/esql/functions/types/to_long.asciidoc @@ -6,6 +6,8 @@ |=== field | result boolean | long +counter_integer | long +counter_long | long datetime | long double | long integer | long diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index ebb6672cbab18..493d09a047a53 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -1709,10 +1709,6 @@ public Function pointReaderIfPossible() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - if (indexMode == IndexMode.TIME_SERIES && metricType == TimeSeriesParams.MetricType.COUNTER) { - // Counters are not supported by ESQL so we load them in null - return BlockLoader.CONSTANT_NULLS; - } if (hasDocValues()) { return type.blockLoaderFromDocValues(name()); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec index 93ecb003685bc..f038e9e54c9a6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec @@ -85,16 +85,16 @@ double tau() "cartesian_point to_cartesianpoint(field:cartesian_point|keyword|text)" "cartesian_shape to_cartesianshape(field:cartesian_point|cartesian_shape|keyword|text)" "date to_datetime(field:date|keyword|text|double|long|unsigned_long|integer)" -"double to_dbl(field:boolean|date|keyword|text|double|long|unsigned_long|integer)" +"double to_dbl(field:boolean|date|keyword|text|double|long|unsigned_long|integer|counter_double|counter_integer|counter_long)" "double to_degrees(number:double|integer|long|unsigned_long)" -"double to_double(field:boolean|date|keyword|text|double|long|unsigned_long|integer)" +"double to_double(field:boolean|date|keyword|text|double|long|unsigned_long|integer|counter_double|counter_integer|counter_long)" "date to_dt(field:date|keyword|text|double|long|unsigned_long|integer)" "geo_point to_geopoint(field:geo_point|keyword|text)" "geo_shape to_geoshape(field:geo_point|geo_shape|keyword|text)" -"integer to_int(field:boolean|date|keyword|text|double|long|unsigned_long|integer)" -"integer to_integer(field:boolean|date|keyword|text|double|long|unsigned_long|integer)" +"integer to_int(field:boolean|date|keyword|text|double|long|unsigned_long|integer|counter_integer)" +"integer to_integer(field:boolean|date|keyword|text|double|long|unsigned_long|integer|counter_integer)" "ip to_ip(field:ip|keyword|text)" -"long to_long(field:boolean|date|keyword|text|double|long|unsigned_long|integer)" +"long to_long(field:boolean|date|keyword|text|double|long|unsigned_long|integer|counter_integer|counter_long)" "keyword|text to_lower(str:keyword|text)" "double to_radians(number:double|integer|long|unsigned_long)" "keyword to_str(field:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)" @@ -198,16 +198,16 @@ to_boolean |field |"boolean|keyword|text|double to_cartesianpo|field |"cartesian_point|keyword|text" |Input value. The input can be a single- or multi-valued column or an expression. to_cartesiansh|field |"cartesian_point|cartesian_shape|keyword|text" |Input value. The input can be a single- or multi-valued column or an expression. to_datetime |field |"date|keyword|text|double|long|unsigned_long|integer" |Input value. The input can be a single- or multi-valued column or an expression. -to_dbl |field |"boolean|date|keyword|text|double|long|unsigned_long|integer" |Input value. The input can be a single- or multi-valued column or an expression. +to_dbl |field |"boolean|date|keyword|text|double|long|unsigned_long|integer|counter_double|counter_integer|counter_long" |Input value. The input can be a single- or multi-valued column or an expression. to_degrees |number |"double|integer|long|unsigned_long" |Input value. The input can be a single- or multi-valued column or an expression. -to_double |field |"boolean|date|keyword|text|double|long|unsigned_long|integer" |Input value. The input can be a single- or multi-valued column or an expression. +to_double |field |"boolean|date|keyword|text|double|long|unsigned_long|integer|counter_double|counter_integer|counter_long" |Input value. The input can be a single- or multi-valued column or an expression. to_dt |field |"date|keyword|text|double|long|unsigned_long|integer" |Input value. The input can be a single- or multi-valued column or an expression. to_geopoint |field |"geo_point|keyword|text" |Input value. The input can be a single- or multi-valued column or an expression. to_geoshape |field |"geo_point|geo_shape|keyword|text" |Input value. The input can be a single- or multi-valued column or an expression. -to_int |field |"boolean|date|keyword|text|double|long|unsigned_long|integer" |Input value. The input can be a single- or multi-valued column or an expression. -to_integer |field |"boolean|date|keyword|text|double|long|unsigned_long|integer" |Input value. The input can be a single- or multi-valued column or an expression. +to_int |field |"boolean|date|keyword|text|double|long|unsigned_long|integer|counter_integer" |Input value. The input can be a single- or multi-valued column or an expression. +to_integer |field |"boolean|date|keyword|text|double|long|unsigned_long|integer|counter_integer" |Input value. The input can be a single- or multi-valued column or an expression. to_ip |field |"ip|keyword|text" |Input value. The input can be a single- or multi-valued column or an expression. -to_long |field |"boolean|date|keyword|text|double|long|unsigned_long|integer" |Input value. The input can be a single- or multi-valued column or an expression. +to_long |field |"boolean|date|keyword|text|double|long|unsigned_long|integer|counter_integer|counter_long" |Input value. The input can be a single- or multi-valued column or an expression. to_lower |str |"keyword|text" |String expression. If `null`, the function returns `null`. to_radians |number |"double|integer|long|unsigned_long" |Input value. The input can be a single- or multi-valued column or an expression. to_str |field |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version" |Input value. The input can be a single- or multi-valued column or an expression. diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tsdb-mapping.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tsdb-mapping.json new file mode 100644 index 0000000000000..c3bba9724602b --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tsdb-mapping.json @@ -0,0 +1,30 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "metricset": { + "type": "keyword", + "time_series_dimension": true + }, + "name": { + "type": "keyword" + }, + "network": { + "properties": { + "connections": { + "type": "long", + "time_series_metric": "gauge" + }, + "bytes_in": { + "type": "long", + "time_series_metric": "counter" + }, + "bytes_out": { + "type": "long", + "time_series_metric": "counter" + } + } + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java index 5488efda7834f..7e54bf94ac263 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java @@ -61,21 +61,21 @@ protected abstract XContentBuilder valueToXContent(XContentBuilder builder, ToXC public static PositionToXContent positionToXContent(ColumnInfo columnInfo, Block block, BytesRef scratch) { return switch (columnInfo.type()) { - case "long" -> new PositionToXContent(block) { + case "long", "counter_long" -> new PositionToXContent(block) { @Override protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Params params, int valueIndex) throws IOException { return builder.value(((LongBlock) block).getLong(valueIndex)); } }; - case "integer" -> new PositionToXContent(block) { + case "integer", "counter_integer" -> new PositionToXContent(block) { @Override protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Params params, int valueIndex) throws IOException { return builder.value(((IntBlock) block).getInt(valueIndex)); } }; - case "double" -> new PositionToXContent(block) { + case "double", "counter_double" -> new PositionToXContent(block) { @Override protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Params params, int valueIndex) throws IOException { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java index f467512fd6c0b..ba9aafe03143f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java @@ -123,9 +123,9 @@ static Object valueAtPosition(Block block, int position, String dataType, BytesR private static Object valueAt(String dataType, Block block, int offset, BytesRef scratch) { return switch (dataType) { case "unsigned_long" -> unsignedLongAsNumber(((LongBlock) block).getLong(offset)); - case "long" -> ((LongBlock) block).getLong(offset); - case "integer" -> ((IntBlock) block).getInt(offset); - case "double" -> ((DoubleBlock) block).getDouble(offset); + case "long", "counter_long" -> ((LongBlock) block).getLong(offset); + case "integer", "counter_integer" -> ((IntBlock) block).getInt(offset); + case "double", "counter_double" -> ((DoubleBlock) block).getDouble(offset); case "keyword", "text" -> ((BytesRefBlock) block).getBytesRef(offset, scratch).utf8ToString(); case "ip" -> { BytesRef val = ((BytesRefBlock) block).getBytesRef(offset, scratch); @@ -174,9 +174,9 @@ static Page valuesToPage(BlockFactory blockFactory, List columns, Li case "unsigned_long" -> ((LongBlock.Builder) builder).appendLong( longToUnsignedLong(((Number) value).longValue(), true) ); - case "long" -> ((LongBlock.Builder) builder).appendLong(((Number) value).longValue()); - case "integer" -> ((IntBlock.Builder) builder).appendInt(((Number) value).intValue()); - case "double" -> ((DoubleBlock.Builder) builder).appendDouble(((Number) value).doubleValue()); + case "long", "counter_long" -> ((LongBlock.Builder) builder).appendLong(((Number) value).longValue()); + case "integer", "counter_integer" -> ((IntBlock.Builder) builder).appendInt(((Number) value).intValue()); + case "double", "counter_double" -> ((DoubleBlock.Builder) builder).appendDouble(((Number) value).doubleValue()); case "keyword", "text", "unsupported" -> ((BytesRefBlock.Builder) builder).appendBytesRef( new BytesRef(value.toString()) ); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 2267125304da7..b318e7ed99bc0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -28,6 +28,7 @@ import org.elasticsearch.xpack.ql.expression.AttributeSet; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.expression.Expressions; +import org.elasticsearch.xpack.ql.expression.FieldAttribute; import org.elasticsearch.xpack.ql.expression.NamedExpression; import org.elasticsearch.xpack.ql.expression.TypeResolutions; import org.elasticsearch.xpack.ql.expression.function.aggregate.AggregateFunction; @@ -193,6 +194,9 @@ private static void checkAggregate(LogicalPlan p, Set failures) { if (attr != null) { groupRefs.add(attr); } + if (e instanceof FieldAttribute f && EsqlDataTypes.isCounterType(f.dataType())) { + failures.add(fail(e, "cannot group by on [{}] type for grouping [{}]", f.dataType().typeName(), e.sourceText())); + } }); // check aggregates - accept only aggregate functions or expressions over grouping diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java index 3ea0721d52c00..c62551a8aa1f6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java @@ -38,7 +38,7 @@ protected Expression.TypeResolution resolveType() { dt -> dt.isNumeric() && dt != DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT, - "numeric except unsigned_long" + "numeric except unsigned_long or counter types" ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java index 7ce655bf59962..957f83453cac3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java @@ -9,7 +9,6 @@ import org.elasticsearch.compute.aggregation.AggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.CountAggregatorFunction; -import org.elasticsearch.xpack.esql.expression.EsqlTypeResolutions; import org.elasticsearch.xpack.esql.expression.SurrogateExpression; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; @@ -17,6 +16,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul; import org.elasticsearch.xpack.esql.planner.ToAggregator; +import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.expression.Literal; import org.elasticsearch.xpack.ql.expression.Nullability; @@ -31,6 +31,7 @@ import java.util.List; import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.DEFAULT; +import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType; public class Count extends AggregateFunction implements EnclosedAgg, ToAggregator, SurrogateExpression { @@ -91,7 +92,7 @@ public Nullability nullable() { @Override protected TypeResolution resolveType() { - return EsqlTypeResolutions.isExact(field(), sourceText(), DEFAULT); + return isType(field(), dt -> EsqlDataTypes.isCounterType(dt) == false, sourceText(), DEFAULT, "any type except counter types"); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index 5e62102aceeaf..b63c070a90ec8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -90,7 +90,7 @@ protected TypeResolution resolveType() { dt -> resolved && dt != DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT, - "any exact type except unsigned_long" + "any exact type except unsigned_long or counter types" ); if (resolution.unresolved() || precision == null) { return resolution; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java index eb602df21d9a0..8ca3889352e40 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java @@ -44,7 +44,7 @@ protected Expression.TypeResolution resolveType() { dt -> dt.isNumeric() && dt != DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT, - "numeric except unsigned_long" + "numeric except unsigned_long or counter types" ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/NumericAggregate.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/NumericAggregate.java index 8e1e38441e9a6..799ec58a18a5d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/NumericAggregate.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/NumericAggregate.java @@ -40,7 +40,7 @@ protected TypeResolution resolveType() { sourceText(), DEFAULT, "datetime", - "numeric except unsigned_long" + "numeric except unsigned_long or counter types" ); } return isType( @@ -48,7 +48,7 @@ protected TypeResolution resolveType() { dt -> dt.isNumeric() && dt != DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT, - "numeric except unsigned_long" + "numeric except unsigned_long or counter types" ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDouble.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDouble.java index 74cf0c4c1deea..20cb46def4d8b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDouble.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDouble.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.InvalidArgumentException; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.tree.NodeInfo; @@ -42,7 +43,10 @@ public class ToDouble extends AbstractConvertFunction { Map.entry(TEXT, ToDoubleFromStringEvaluator.Factory::new), Map.entry(UNSIGNED_LONG, ToDoubleFromUnsignedLongEvaluator.Factory::new), Map.entry(LONG, ToDoubleFromLongEvaluator.Factory::new), // CastLongToDoubleEvaluator would be a candidate, but not MV'd - Map.entry(INTEGER, ToDoubleFromIntEvaluator.Factory::new) // CastIntToDoubleEvaluator would be a candidate, but not MV'd + Map.entry(INTEGER, ToDoubleFromIntEvaluator.Factory::new), // CastIntToDoubleEvaluator would be a candidate, but not MV'd + Map.entry(EsqlDataTypes.COUNTER_DOUBLE, (field, source) -> field), + Map.entry(EsqlDataTypes.COUNTER_INTEGER, ToDoubleFromIntEvaluator.Factory::new), + Map.entry(EsqlDataTypes.COUNTER_LONG, ToDoubleFromLongEvaluator.Factory::new) ); @FunctionInfo( @@ -65,7 +69,18 @@ public ToDouble( Source source, @Param( name = "field", - type = { "boolean", "date", "keyword", "text", "double", "long", "unsigned_long", "integer" }, + type = { + "boolean", + "date", + "keyword", + "text", + "double", + "long", + "unsigned_long", + "integer", + "counter_double", + "counter_integer", + "counter_long" }, description = "Input value. The input can be a single- or multi-valued column or an expression." ) Expression field ) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToInteger.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToInteger.java index d50f1f613b589..32e3b8a77695c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToInteger.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToInteger.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.InvalidArgumentException; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.tree.NodeInfo; @@ -43,7 +44,8 @@ public class ToInteger extends AbstractConvertFunction { Map.entry(TEXT, ToIntegerFromStringEvaluator.Factory::new), Map.entry(DOUBLE, ToIntegerFromDoubleEvaluator.Factory::new), Map.entry(UNSIGNED_LONG, ToIntegerFromUnsignedLongEvaluator.Factory::new), - Map.entry(LONG, ToIntegerFromLongEvaluator.Factory::new) + Map.entry(LONG, ToIntegerFromLongEvaluator.Factory::new), + Map.entry(EsqlDataTypes.COUNTER_INTEGER, (fieldEval, source) -> fieldEval) ); @FunctionInfo( @@ -68,7 +70,7 @@ public ToInteger( Source source, @Param( name = "field", - type = { "boolean", "date", "keyword", "text", "double", "long", "unsigned_long", "integer" }, + type = { "boolean", "date", "keyword", "text", "double", "long", "unsigned_long", "integer", "counter_integer" }, description = "Input value. The input can be a single- or multi-valued column or an expression." ) Expression field ) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToLong.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToLong.java index 77973ec49b7e3..c7b77a3c7f2c6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToLong.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToLong.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.InvalidArgumentException; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.tree.NodeInfo; @@ -43,7 +44,9 @@ public class ToLong extends AbstractConvertFunction { Map.entry(TEXT, ToLongFromStringEvaluator.Factory::new), Map.entry(DOUBLE, ToLongFromDoubleEvaluator.Factory::new), Map.entry(UNSIGNED_LONG, ToLongFromUnsignedLongEvaluator.Factory::new), - Map.entry(INTEGER, ToLongFromIntEvaluator.Factory::new) // CastIntToLongEvaluator would be a candidate, but not MV'd + Map.entry(INTEGER, ToLongFromIntEvaluator.Factory::new), // CastIntToLongEvaluator would be a candidate, but not MV'd + Map.entry(EsqlDataTypes.COUNTER_LONG, (field, source) -> field), + Map.entry(EsqlDataTypes.COUNTER_INTEGER, ToLongFromIntEvaluator.Factory::new) ); @FunctionInfo( @@ -67,7 +70,17 @@ public ToLong( Source source, @Param( name = "field", - type = { "boolean", "date", "keyword", "text", "double", "long", "unsigned_long", "integer" }, + type = { + "boolean", + "date", + "keyword", + "text", + "double", + "long", + "unsigned_long", + "integer", + "counter_integer", + "counter_long" }, description = "Input value. The input can be a single- or multi-valued column or an expression." ) Expression field ) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 3ea3bd54da135..e7285bae32408 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -344,7 +344,8 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte case "version" -> TopNEncoder.VERSION; case "boolean", "null", "byte", "short", "integer", "long", "double", "float", "half_float", "datetime", "date_period", "time_duration", "object", "nested", "scaled_float", "unsigned_long", "_doc" -> TopNEncoder.DEFAULT_SORTABLE; - case "geo_point", "cartesian_point", "geo_shape", "cartesian_shape" -> TopNEncoder.DEFAULT_UNSORTABLE; + case "geo_point", "cartesian_point", "geo_shape", "cartesian_shape", "counter_long", "counter_integer", "counter_double" -> + TopNEncoder.DEFAULT_UNSORTABLE; // unsupported fields are encoded as BytesRef, we'll use the same encoder; all values should be null at this point case "unsupported" -> TopNEncoder.UNSUPPORTED; default -> throw new EsqlIllegalArgumentException("No TopN sorting encoder for type " + inverse.get(channel).type()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index 98bf932ce3af8..26c57f13e16c4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -251,13 +251,16 @@ public static ElementType toElementType(DataType dataType) { * For example, spatial types can be extracted into doc-values under specific conditions, otherwise they extract as BytesRef. */ public static ElementType toElementType(DataType dataType, MappedFieldType.FieldExtractPreference fieldExtractPreference) { - if (dataType == DataTypes.LONG || dataType == DataTypes.DATETIME || dataType == DataTypes.UNSIGNED_LONG) { + if (dataType == DataTypes.LONG + || dataType == DataTypes.DATETIME + || dataType == DataTypes.UNSIGNED_LONG + || dataType == EsqlDataTypes.COUNTER_LONG) { return ElementType.LONG; } - if (dataType == DataTypes.INTEGER) { + if (dataType == DataTypes.INTEGER || dataType == EsqlDataTypes.COUNTER_INTEGER) { return ElementType.INT; } - if (dataType == DataTypes.DOUBLE) { + if (dataType == DataTypes.DOUBLE || dataType == EsqlDataTypes.COUNTER_DOUBLE) { return ElementType.DOUBLE; } // unsupported fields are passed through as a BytesRef diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlFeatures.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlFeatures.java index 89c7455baf885..b508e9a4f040c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlFeatures.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlFeatures.java @@ -121,6 +121,11 @@ public class EsqlFeatures implements FeatureSpecification { */ public static final NodeFeature MV_ORDERING_SORTED_ASCENDING = new NodeFeature("esql.mv_ordering_sorted_ascending"); + /** + * Support for metrics counter fields + */ + public static final NodeFeature METRICS_COUNTER_FIELDS = new NodeFeature("esql.metrics_counter_fields"); + @Override public Set getFeatures() { return Set.of( @@ -139,7 +144,8 @@ public Set getFeatures() { ST_DISJOINT, STRING_LITERAL_AUTO_CASTING, CASTING_OPERATOR, - MV_ORDERING_SORTED_ASCENDING + MV_ORDERING_SORTED_ASCENDING, + METRICS_COUNTER_FIELDS ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java index 2910a690bf8a0..e763d54a2dcf4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java @@ -10,7 +10,6 @@ import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.xpack.ql.type.DataType; import org.elasticsearch.xpack.ql.type.DataTypeRegistry; -import org.elasticsearch.xpack.ql.type.DataTypes; import java.util.Collection; @@ -37,10 +36,10 @@ public Collection dataTypes() { @Override public DataType fromEs(String typeName, TimeSeriesParams.MetricType metricType) { if (metricType == TimeSeriesParams.MetricType.COUNTER) { - // Counter fields will be a counter type, for now they are unsupported - return DataTypes.UNSUPPORTED; + return EsqlDataTypes.getCounterType(typeName); + } else { + return EsqlDataTypes.fromName(typeName); } - return EsqlDataTypes.fromName(typeName); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java index 468ffcc2cba2a..912c17dae0865 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java @@ -51,6 +51,17 @@ public final class EsqlDataTypes { public static final DataType GEO_SHAPE = new DataType("geo_shape", Integer.MAX_VALUE, false, false, true); public static final DataType CARTESIAN_SHAPE = new DataType("cartesian_shape", Integer.MAX_VALUE, false, false, true); + /** + * These are numeric fields labeled as metric counters in time-series indices. Although stored + * internally as numeric fields, they represent cumulative metrics and must not be treated as regular + * numeric fields. Therefore, we define them differently and separately from their parent numeric field. + * These fields are strictly for use in retrieval from indices, rate aggregation, and casting to their + * parent numeric type. + */ + public static final DataType COUNTER_LONG = new DataType("counter_long", LONG.size(), false, false, LONG.hasDocValues()); + public static final DataType COUNTER_INTEGER = new DataType("counter_integer", INTEGER.size(), false, false, INTEGER.hasDocValues()); + public static final DataType COUNTER_DOUBLE = new DataType("counter_double", DOUBLE.size(), false, false, DOUBLE.hasDocValues()); + private static final Collection TYPES = Stream.of( BOOLEAN, UNSUPPORTED, @@ -77,7 +88,10 @@ public final class EsqlDataTypes { GEO_POINT, CARTESIAN_POINT, CARTESIAN_SHAPE, - GEO_SHAPE + GEO_SHAPE, + COUNTER_LONG, + COUNTER_INTEGER, + COUNTER_DOUBLE ).sorted(Comparator.comparing(DataType::typeName)).toList(); private static final Map NAME_TO_TYPE = TYPES.stream().collect(toUnmodifiableMap(DataType::typeName, t -> t)); @@ -212,7 +226,8 @@ public static boolean isRepresentable(DataType t) { && t != FLOAT && t != SCALED_FLOAT && t != SOURCE - && t != HALF_FLOAT; + && t != HALF_FLOAT + && isCounterType(t) == false; } public static boolean areCompatible(DataType left, DataType right) { @@ -232,4 +247,12 @@ public static DataType widenSmallNumericTypes(DataType type) { } return type; } + + public static DataType getCounterType(String typeName) { + return fromTypeName("counter_" + typeName); + } + + public static boolean isCounterType(DataType dt) { + return dt == COUNTER_LONG || dt == COUNTER_INTEGER || dt == COUNTER_DOUBLE; + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java index 0e2886d099916..79939365181aa 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java @@ -143,9 +143,9 @@ private Page randomPage(List columns) { return new Page(columns.stream().map(c -> { Block.Builder builder = PlannerUtils.toElementType(EsqlDataTypes.fromName(c.type())).newBlockBuilder(1, blockFactory); switch (c.type()) { - case "unsigned_long", "long" -> ((LongBlock.Builder) builder).appendLong(randomLong()); - case "integer" -> ((IntBlock.Builder) builder).appendInt(randomInt()); - case "double" -> ((DoubleBlock.Builder) builder).appendDouble(randomDouble()); + case "unsigned_long", "long", "counter_long" -> ((LongBlock.Builder) builder).appendLong(randomLong()); + case "integer", "counter_integer" -> ((IntBlock.Builder) builder).appendInt(randomInt()); + case "double", "counter_double" -> ((DoubleBlock.Builder) builder).appendDouble(randomDouble()); case "keyword" -> ((BytesRefBlock.Builder) builder).appendBytesRef(new BytesRef(randomAlphaOfLength(10))); case "text" -> ((BytesRefBlock.Builder) builder).appendBytesRef(new BytesRef(randomAlphaOfLength(10000))); case "ip" -> ((BytesRefBlock.Builder) builder).appendBytesRef( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java index ad8cb1003eeaa..a94cba52f8f0a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java @@ -134,4 +134,8 @@ public static void loadEnrichPolicyResolution( public static void loadEnrichPolicyResolution(EnrichResolution enrich, String policy, String field, String index, String mapping) { loadEnrichPolicyResolution(enrich, EnrichPolicy.MATCH_TYPE, policy, field, index, mapping); } + + public static IndexResolution tsdbIndexResolution() { + return loadMapping("tsdb-mapping.json", "test"); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 8f474e6cb6a83..1f32a5a76f3e8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -1777,45 +1777,45 @@ public void testDeferredGroupingInStats() { } public void testUnsupportedTypesInStats() { - verifyUnsupported( - """ - row x = to_unsigned_long(\"10\") - | stats avg(x), count_distinct(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x) - """, - "Found 8 problems\n" - + "line 2:12: argument of [avg(x)] must be [numeric except unsigned_long], found value [x] type [unsigned_long]\n" - + "line 2:20: argument of [count_distinct(x)] must be [any exact type except unsigned_long], " - + "found value [x] type [unsigned_long]\n" - + "line 2:39: argument of [max(x)] must be [datetime or numeric except unsigned_long], " - + "found value [max(x)] type [unsigned_long]\n" - + "line 2:47: argument of [median(x)] must be [numeric except unsigned_long], found value [x] type [unsigned_long]\n" - + "line 2:58: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long], " - + "found value [x] type [unsigned_long]\n" - + "line 2:88: argument of [min(x)] must be [datetime or numeric except unsigned_long], " - + "found value [min(x)] type [unsigned_long]\n" - + "line 2:96: first argument of [percentile(x, 10)] must be [numeric except unsigned_long], " - + "found value [x] type [unsigned_long]\n" - + "line 2:115: argument of [sum(x)] must be [numeric except unsigned_long], found value [x] type [unsigned_long]" - ); + verifyUnsupported(""" + row x = to_unsigned_long(\"10\") + | stats avg(x), count_distinct(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x) + """, """ + Found 8 problems + line 2:12: argument of [avg(x)] must be [numeric except unsigned_long or counter types],\ + found value [x] type [unsigned_long] + line 2:20: argument of [count_distinct(x)] must be [any exact type except unsigned_long or counter types],\ + found value [x] type [unsigned_long] + line 2:39: argument of [max(x)] must be [datetime or numeric except unsigned_long or counter types],\ + found value [max(x)] type [unsigned_long] + line 2:47: argument of [median(x)] must be [numeric except unsigned_long or counter types],\ + found value [x] type [unsigned_long] + line 2:58: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long or counter types],\ + found value [x] type [unsigned_long] + line 2:88: argument of [min(x)] must be [datetime or numeric except unsigned_long or counter types],\ + found value [min(x)] type [unsigned_long] + line 2:96: first argument of [percentile(x, 10)] must be [numeric except unsigned_long],\ + found value [x] type [unsigned_long] + line 2:115: argument of [sum(x)] must be [numeric except unsigned_long or counter types],\ + found value [x] type [unsigned_long]"""); - verifyUnsupported( - """ - row x = to_version("1.2") - | stats avg(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x) - """, - "Found 7 problems\n" - + "line 2:10: argument of [avg(x)] must be [numeric except unsigned_long], found value [x] type [version]\n" - + "line 2:18: argument of [max(x)] must be [datetime or numeric except unsigned_long], " - + "found value [max(x)] type [version]\n" - + "line 2:26: argument of [median(x)] must be [numeric except unsigned_long], found value [x] type [version]\n" - + "line 2:37: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long], " - + "found value [x] type [version]\n" - + "line 2:67: argument of [min(x)] must be [datetime or numeric except unsigned_long], " - + "found value [min(x)] type [version]\n" - + "line 2:75: first argument of [percentile(x, 10)] must be [numeric except unsigned_long], " - + "found value [x] type [version]\n" - + "line 2:94: argument of [sum(x)] must be [numeric except unsigned_long], found value [x] type [version]" - ); + verifyUnsupported(""" + row x = to_version("1.2") + | stats avg(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x) + """, """ + Found 7 problems + line 2:10: argument of [avg(x)] must be [numeric except unsigned_long or counter types],\ + found value [x] type [version] + line 2:18: argument of [max(x)] must be [datetime or numeric except unsigned_long or counter types],\ + found value [max(x)] type [version] + line 2:26: argument of [median(x)] must be [numeric except unsigned_long or counter types],\ + found value [x] type [version] + line 2:37: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long or counter types],\ + found value [x] type [version] + line 2:67: argument of [min(x)] must be [datetime or numeric except unsigned_long or counter types],\ + found value [min(x)] type [version] + line 2:75: first argument of [percentile(x, 10)] must be [numeric except unsigned_long], found value [x] type [version] + line 2:94: argument of [sum(x)] must be [numeric except unsigned_long or counter types], found value [x] type [version]"""); } public void testInOnText() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 8d9140cdda5f4..f563e1a6cb25c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -21,12 +21,14 @@ import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping; import static org.elasticsearch.xpack.ql.type.DataTypes.UNSIGNED_LONG; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; //@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") public class VerifierTests extends ESTestCase { private static final EsqlParser parser = new EsqlParser(); private final Analyzer defaultAnalyzer = AnalyzerTestUtils.expandedDefaultAnalyzer(); + private final Analyzer tsdb = AnalyzerTestUtils.analyzer(AnalyzerTestUtils.tsdbIndexResolution()); public void testIncompatibleTypesInMathOperation() { assertEquals( @@ -72,7 +74,8 @@ public void testAggsExpressionsInStatsAggs() { error("from test | stats max(max(salary)) by first_name") ); assertEquals( - "1:25: argument of [avg(first_name)] must be [numeric except unsigned_long], found value [first_name] type [keyword]", + "1:25: argument of [avg(first_name)] must be [numeric except unsigned_long or counter types]," + + " found value [first_name] type [keyword]", error("from test | stats count(avg(first_name)) by first_name") ); assertEquals( @@ -378,7 +381,8 @@ public void testUnsignedLongNegation() { public void testSumOnDate() { assertEquals( - "1:19: argument of [sum(hire_date)] must be [numeric except unsigned_long], found value [hire_date] type [datetime]", + "1:19: argument of [sum(hire_date)] must be [numeric except unsigned_long or counter types]," + + " found value [hire_date] type [datetime]", error("from test | stats sum(hire_date)") ); } @@ -480,6 +484,39 @@ public void testInlineImpossibleConvert() { assertEquals("1:5: argument of [false::ip] must be [ip or string], found value [false] type [boolean]", error("ROW false::ip")); } + public void testAggregateOnCounter() { + assertThat( + error("FROM tests | STATS min(network.bytes_in)", tsdb), + equalTo( + "1:20: argument of [min(network.bytes_in)] must be [datetime or numeric except unsigned_long or counter types]," + + " found value [min(network.bytes_in)] type [counter_long]" + ) + ); + + assertThat( + error("FROM tests | STATS max(network.bytes_in)", tsdb), + equalTo( + "1:20: argument of [max(network.bytes_in)] must be [datetime or numeric except unsigned_long or counter types]," + + " found value [max(network.bytes_in)] type [counter_long]" + ) + ); + + assertThat( + error("FROM tests | STATS count(network.bytes_out)", tsdb), + equalTo( + "1:20: argument of [count(network.bytes_out)] must be [any type except counter types]," + + " found value [network.bytes_out] type [counter_long]" + ) + ); + } + + public void testGroupByCounter() { + assertThat( + error("FROM tests | STATS count(*) BY network.bytes_in", tsdb), + equalTo("1:32: cannot group by on [counter_long] type for grouping [network.bytes_in]") + ); + } + private String error(String query) { return error(query, defaultAnalyzer); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java index 3e1fbaa2940eb..772dea0ef4557 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java @@ -121,11 +121,11 @@ public static Literal randomLiteral(DataType type) { case "boolean" -> randomBoolean(); case "byte" -> randomByte(); case "short" -> randomShort(); - case "integer" -> randomInt(); - case "unsigned_long", "long" -> randomLong(); + case "integer", "counter_integer" -> randomInt(); + case "unsigned_long", "long", "counter_long" -> randomLong(); case "date_period" -> Period.of(randomIntBetween(-1000, 1000), randomIntBetween(-13, 13), randomIntBetween(-32, 32)); case "datetime" -> randomMillisUpToYear9999(); - case "double", "scaled_float" -> randomDouble(); + case "double", "scaled_float", "counter_double" -> randomDouble(); case "float" -> randomFloat(); case "half_float" -> HalfFloatPoint.sortableShortToHalfFloat(HalfFloatPoint.halfFloatToSortableShort(randomFloat())); case "keyword" -> new BytesRef(randomAlphaOfLength(5)); @@ -946,6 +946,57 @@ protected static String typeErrorMessage(boolean includeOrdinal, List parameters() { List.of() ); + TestCaseSupplier.unary( + suppliers, + "Attribute[channel=0]", + List.of(new TestCaseSupplier.TypedDataSupplier("counter", ESTestCase::randomDouble, EsqlDataTypes.COUNTER_DOUBLE)), + DataTypes.DOUBLE, + l -> l, + List.of() + ); + TestCaseSupplier.unary( + suppliers, + evaluatorName.apply("Integer"), + List.of(new TestCaseSupplier.TypedDataSupplier("counter", () -> randomInt(1000), EsqlDataTypes.COUNTER_INTEGER)), + DataTypes.DOUBLE, + l -> ((Integer) l).doubleValue(), + List.of() + ); + TestCaseSupplier.unary( + suppliers, + evaluatorName.apply("Long"), + List.of(new TestCaseSupplier.TypedDataSupplier("counter", () -> randomLongBetween(1, 1000), EsqlDataTypes.COUNTER_LONG)), + DataTypes.DOUBLE, + l -> ((Long) l).doubleValue(), + List.of() + ); + return parameterSuppliersFromTypedData(errorsForCasesWithoutExamples(anyNullIsNull(true, suppliers))); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIntegerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIntegerTests.java index e6f6cb7e978f7..bc27ded5a6dae 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIntegerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIntegerTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.tree.Source; import org.elasticsearch.xpack.ql.type.DataTypes; @@ -257,6 +258,15 @@ public static Iterable parameters() { ) ); + TestCaseSupplier.unary( + suppliers, + "Attribute[channel=0]", + List.of(new TestCaseSupplier.TypedDataSupplier("counter", ESTestCase::randomInt, EsqlDataTypes.COUNTER_INTEGER)), + DataTypes.INTEGER, + l -> l, + List.of() + ); + return parameterSuppliersFromTypedData(errorsForCasesWithoutExamples(anyNullIsNull(true, suppliers))); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToLongTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToLongTests.java index 1879b7ce97ea8..3b123344b4b11 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToLongTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToLongTests.java @@ -11,8 +11,10 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.tree.Source; import org.elasticsearch.xpack.ql.type.DataTypes; @@ -208,6 +210,22 @@ public static Iterable parameters() { ) ); + TestCaseSupplier.unary( + suppliers, + "Attribute[channel=0]", + List.of(new TestCaseSupplier.TypedDataSupplier("counter", ESTestCase::randomNonNegativeLong, EsqlDataTypes.COUNTER_LONG)), + DataTypes.LONG, + l -> l, + List.of() + ); + TestCaseSupplier.unary( + suppliers, + evaluatorName.apply("Integer"), + List.of(new TestCaseSupplier.TypedDataSupplier("counter", ESTestCase::randomInt, EsqlDataTypes.COUNTER_INTEGER)), + DataTypes.LONG, + l -> ((Integer) l).longValue(), + List.of() + ); return parameterSuppliersFromTypedData(errorsForCasesWithoutExamples(anyNullIsNull(true, suppliers))); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java index 93f58398d267f..23d2f8da488e1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java @@ -23,8 +23,12 @@ import static org.hamcrest.Matchers.equalTo; public class EsqlDataTypeRegistryTests extends ESTestCase { + public void testCounter() { - resolve("long", TimeSeriesParams.MetricType.COUNTER, DataTypes.UNSUPPORTED); + resolve("long", TimeSeriesParams.MetricType.COUNTER, EsqlDataTypes.COUNTER_LONG); + resolve("integer", TimeSeriesParams.MetricType.COUNTER, EsqlDataTypes.COUNTER_INTEGER); + resolve("double", TimeSeriesParams.MetricType.COUNTER, EsqlDataTypes.COUNTER_DOUBLE); + } public void testGauge() { diff --git a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/type/Types.java b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/type/Types.java index a19f4c634f77c..00f776db29fb6 100644 --- a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/type/Types.java +++ b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/type/Types.java @@ -53,7 +53,13 @@ private static DataType getType(DataTypeRegistry typeRegistry, Map loadMapping(DataTypeRegistry registry, String private static Map loadMapping(DataTypeRegistry registry, InputStream stream, Boolean ordered) { boolean order = ordered != null ? ordered.booleanValue() : randomBoolean(); try (InputStream in = stream) { - return Types.fromEs(registry, XContentHelper.convertToMap(JsonXContent.jsonXContent, in, order)); + Map map = XContentHelper.convertToMap(JsonXContent.jsonXContent, in, order); + return Types.fromEs(registry, map); } catch (IOException ex) { throw new RuntimeException(ex); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml index 30b81860f014f..c09bc17ab9a5c 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml @@ -1,7 +1,7 @@ setup: - requires: - cluster_features: ["gte_v8.11.0"] - reason: "ESQL is available in 8.11+" + cluster_features: ["esql.metrics_counter_fields"] + reason: "require metrics counter fields" test_runner_features: allowed_warnings_regex - do: indices.create: @@ -38,7 +38,7 @@ setup: type: long time_series_metric: counter rx: - type: long + type: integer time_series_metric: counter - do: bulk: @@ -112,7 +112,6 @@ load everything: reason: "_source is available in 8.13+" - do: allowed_warnings_regex: - - "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null" - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: @@ -126,9 +125,9 @@ load everything: - match: {columns.2.name: "k8s.pod.name"} - match: {columns.2.type: "keyword"} - match: {columns.3.name: "k8s.pod.network.rx"} - - match: {columns.3.type: "unsupported"} + - match: {columns.3.type: "counter_integer"} - match: {columns.4.name: "k8s.pod.network.tx"} - - match: {columns.4.type: "unsupported"} + - match: {columns.4.type: "counter_long"} - match: {columns.5.name: "k8s.pod.uid"} - match: {columns.5.type: "keyword"} - match: {columns.6.name: "metricset"} @@ -139,7 +138,6 @@ load everything: load a document: - do: allowed_warnings_regex: - - "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null" - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: @@ -151,25 +149,60 @@ load a document: - match: {values.0.0: "2021-04-28T18:50:23.142Z"} - match: {values.0.1: "10.10.55.3"} - match: {values.0.2: "dog"} - - match: {values.0.3: null } - - match: {values.0.4: null } + - match: {values.0.3: 530600088 } + - match: {values.0.4: 1434577921 } - match: {values.0.5: "df3145b3-0563-4d3b-a0f7-897eb2876ea9"} - match: {values.0.6: "pod"} --- -filter on counter: +filter on counter without cast: - do: - catch: /Cannot use field \[k8s.pod.network.tx\] with unsupported type \[counter\]/ + catch: bad_request esql.query: body: query: 'from test | where k8s.pod.network.tx == 1434577921' version: 2024.04.01 +--- +cast counter then filter: + - do: + esql.query: + body: + query: 'from test | where k8s.pod.network.tx::long == 2005177954 and k8s.pod.network.rx::integer == 801479970 | sort @timestamp | limit 10' + version: 2024.04.01 + - length: {values: 1} + - length: {values.0: 7} + - match: {values.0.0: "2021-04-28T18:50:24.467Z"} + - match: {values.0.1: "10.10.55.1"} + - match: {values.0.2: "cat"} + - match: {values.0.3: 801479970 } + - match: {values.0.4: 2005177954 } + - match: {values.0.5: "947e4ced-1786-4e53-9e0c-5c447e959507"} + - match: {values.0.6: "pod"} + +--- +sort on counter without cast: + - do: + catch: bad_request + esql.query: + body: + query: 'from test | KEEP k8s.pod.network.tx | sort @k8s.pod.network.tx | limit 1' + version: 2024.04.01 + +--- +cast then sort on counter: + - do: + esql.query: + body: + query: 'from test | KEEP k8s.pod.network.tx | EVAL tx=to_long(k8s.pod.network.tx) | sort tx | limit 1' + version: 2024.04.01 + - length: {values: 1} + - match: {values.0.0: 1434521831 } + --- from doc with aggregate_metric_double: - do: allowed_warnings_regex: - - "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null" - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: @@ -201,7 +234,6 @@ stats on aggregate_metric_double: from index pattern unsupported counter: - do: allowed_warnings_regex: - - "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null" - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: @@ -219,7 +251,7 @@ from index pattern unsupported counter: - match: {columns.4.name: "k8s.pod.name"} - match: {columns.4.type: "keyword"} - match: {columns.5.name: "k8s.pod.network.rx"} - - match: {columns.5.type: "unsupported"} + - match: {columns.5.type: "counter_integer"} - match: {columns.6.name: "k8s.pod.network.tx"} - match: {columns.6.type: "unsupported"} - match: {columns.7.name: "k8s.pod.uid"} From 4664ced1bd096e66e3ae367b462272b416825d80 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Fri, 26 Apr 2024 16:09:07 -0400 Subject: [PATCH 14/14] [ESQL] Binary Comparison Serialization (#107921) Prior to this PR, serializing a binary comparison in ES|QL depended on the enum BinaryComparisonProcessor.BinaryComparisonOperator from the QL binary comparison code. That put some distance between the ESQL classes and their serialization logic, while also limiting our ability to make adjustments to that logic (since doing so would have ramifications for SQL and EQL) This PR introduces a new ESQL specific enum for binary comparisons, which has a Writer and a Reader built in, and which implements the standard Writable interface. This enum is constructed in such a way as to be wire-compatible with the existing enum, thus not requiring a transport version change (although any future changes to this probably will require a transport version change). A side effect of this change is removing Null Equals from ESQL serialization. We never actually implemented Null Equals, and the existing class is a stub. I infer that it was only created to allow use of the QL BinaryComparisonOperator enum, which specifies a Null Equals. I did not include it in the ESQL specific enum I just added, and as such removed it from places that reference that enum. There is also a "shim" mapping from the new ESQL specific enum to the general QL enum. This is necessary for passing up to the parent BinaryOperation class. Changing the argument for that to use an interface like ArithmeticOperation does would require some non-trivial changes to how QL does serialization, which would dramatically increase the surface area of this PR. Medium term, I would like to change EsqlBinaryComparison to inherit directly from BinaryOperator, which will remove the need for that shim. Unfortunately, doing so proved non-trivial, and so I'm saving that for follow up work. Follow up work: - Remove remaining references to Null Equals, and the ESQL Null Equals class. - Move PlanNamedTypes.writeBinComparison and PlanNamedTypes.readBinComparison into EsqlBinaryComparison, and make EsqlBinaryComparison Writable. This will finish putting the serialization logic next to the object being serialized, for binary comparisons. - Remove the "shim" by changing EsqlBinaryComparison to inherit directly from BinaryOperation --- .../predicate/operator/comparison/Equals.java | 12 ++- .../comparison/EsqlBinaryComparison.java | 73 +++++++++++++++++-- .../operator/comparison/GreaterThan.java | 10 +-- .../comparison/GreaterThanOrEqual.java | 10 +-- .../operator/comparison/LessThan.java | 12 +-- .../operator/comparison/LessThanOrEqual.java | 12 +-- .../operator/comparison/NotEquals.java | 12 ++- .../xpack/esql/io/stream/PlanNamedTypes.java | 40 ++++------ .../comparison/EsqlBinaryComparisonTests.java | 65 +++++++++++++++++ .../esql/io/stream/PlanNamedTypesTests.java | 32 +++----- 10 files changed, 191 insertions(+), 87 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/EsqlBinaryComparisonTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/Equals.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/Equals.java index 62eec13af008a..9cc10a555f288 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/Equals.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/Equals.java @@ -12,8 +12,6 @@ import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.expression.predicate.Negatable; -import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison; -import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparisonProcessor; import org.elasticsearch.xpack.ql.tree.NodeInfo; import org.elasticsearch.xpack.ql.tree.Source; import org.elasticsearch.xpack.ql.type.DataType; @@ -22,7 +20,7 @@ import java.time.ZoneId; import java.util.Map; -public class Equals extends EsqlBinaryComparison implements Negatable { +public class Equals extends EsqlBinaryComparison implements Negatable { private static final Map evaluatorMap = Map.ofEntries( Map.entry(DataTypes.BOOLEAN, EqualsBoolsEvaluator.Factory::new), Map.entry(DataTypes.INTEGER, EqualsIntsEvaluator.Factory::new), @@ -41,11 +39,11 @@ public class Equals extends EsqlBinaryComparison implements Negatable evaluatorMap; + private final BinaryComparisonOperation functionType; + + @FunctionalInterface + public interface BinaryOperatorConstructor { + EsqlBinaryComparison apply(Source source, Expression lhs, Expression rhs); + } + + public enum BinaryComparisonOperation implements Writeable { + + EQ(0, "==", BinaryComparisonProcessor.BinaryComparisonOperation.EQ, Equals::new), + // id 1 reserved for NullEquals + NEQ(2, "!=", BinaryComparisonProcessor.BinaryComparisonOperation.NEQ, NotEquals::new), + GT(3, ">", BinaryComparisonProcessor.BinaryComparisonOperation.GT, GreaterThan::new), + GTE(4, ">=", BinaryComparisonProcessor.BinaryComparisonOperation.GTE, GreaterThanOrEqual::new), + LT(5, "<", BinaryComparisonProcessor.BinaryComparisonOperation.LT, LessThan::new), + LTE(6, "<=", BinaryComparisonProcessor.BinaryComparisonOperation.LTE, LessThanOrEqual::new); + + private final int id; + private final String symbol; + // Temporary mapping to the old enum, to satisfy the superclass constructor signature. + private final BinaryComparisonProcessor.BinaryComparisonOperation shim; + private final BinaryOperatorConstructor constructor; + + BinaryComparisonOperation( + int id, + String symbol, + BinaryComparisonProcessor.BinaryComparisonOperation shim, + BinaryOperatorConstructor constructor + ) { + this.id = id; + this.symbol = symbol; + this.shim = shim; + this.constructor = constructor; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(id); + } + + public static BinaryComparisonOperation readFromStream(StreamInput in) throws IOException { + int id = in.readVInt(); + for (BinaryComparisonOperation op : values()) { + if (op.id == id) { + return op; + } + } + throw new IOException("No BinaryComparisonOperation found for id [" + id + "]"); + } + + public EsqlBinaryComparison buildNewInstance(Source source, Expression lhs, Expression rhs) { + return constructor.apply(source, lhs, rhs); + } + } + protected EsqlBinaryComparison( Source source, Expression left, Expression right, - /* TODO: BinaryComparisonOperator is an enum with a bunch of functionality we don't really want. We should extract an interface and - create a symbol only version like we did for BinaryArithmeticOperation. Ideally, they could be the same class. - */ - BinaryComparisonProcessor.BinaryComparisonOperation operation, + BinaryComparisonOperation operation, Map evaluatorMap ) { this(source, left, right, operation, null, evaluatorMap); @@ -49,13 +105,18 @@ protected EsqlBinaryComparison( Source source, Expression left, Expression right, - BinaryComparisonProcessor.BinaryComparisonOperation operation, + BinaryComparisonOperation operation, // TODO: We are definitely not doing the right thing with this zoneId ZoneId zoneId, Map evaluatorMap ) { - super(source, left, right, operation, zoneId); + super(source, left, right, operation.shim, zoneId); this.evaluatorMap = evaluatorMap; + this.functionType = operation; + } + + public BinaryComparisonOperation getFunctionType() { + return functionType; } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/GreaterThan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/GreaterThan.java index 3eca0e858acbf..09fb32add0f18 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/GreaterThan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/GreaterThan.java @@ -11,8 +11,6 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.expression.predicate.Negatable; -import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison; -import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparisonProcessor; import org.elasticsearch.xpack.ql.tree.NodeInfo; import org.elasticsearch.xpack.ql.tree.Source; import org.elasticsearch.xpack.ql.type.DataType; @@ -21,7 +19,7 @@ import java.time.ZoneId; import java.util.Map; -public class GreaterThan extends EsqlBinaryComparison implements Negatable { +public class GreaterThan extends EsqlBinaryComparison implements Negatable { private static final Map evaluatorMap = Map.ofEntries( Map.entry(DataTypes.INTEGER, GreaterThanIntsEvaluator.Factory::new), Map.entry(DataTypes.DOUBLE, GreaterThanDoublesEvaluator.Factory::new), @@ -35,11 +33,11 @@ public class GreaterThan extends EsqlBinaryComparison implements Negatable { +public class GreaterThanOrEqual extends EsqlBinaryComparison implements Negatable { private static final Map evaluatorMap = Map.ofEntries( Map.entry(DataTypes.INTEGER, GreaterThanOrEqualIntsEvaluator.Factory::new), Map.entry(DataTypes.DOUBLE, GreaterThanOrEqualDoublesEvaluator.Factory::new), @@ -35,11 +33,11 @@ public class GreaterThanOrEqual extends EsqlBinaryComparison implements Negatabl ); public GreaterThanOrEqual(Source source, Expression left, Expression right) { - super(source, left, right, BinaryComparisonProcessor.BinaryComparisonOperation.GTE, evaluatorMap); + super(source, left, right, BinaryComparisonOperation.GTE, evaluatorMap); } public GreaterThanOrEqual(Source source, Expression left, Expression right, ZoneId zoneId) { - super(source, left, right, BinaryComparisonProcessor.BinaryComparisonOperation.GTE, zoneId, evaluatorMap); + super(source, left, right, BinaryComparisonOperation.GTE, zoneId, evaluatorMap); } @Override @@ -63,7 +61,7 @@ public LessThan negate() { } @Override - public BinaryComparison reverse() { + public EsqlBinaryComparison reverse() { return new LessThanOrEqual(source(), left(), right(), zoneId()); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/LessThan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/LessThan.java index 6b82df1d67da6..1649706a643c3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/LessThan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/LessThan.java @@ -11,8 +11,6 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.expression.predicate.Negatable; -import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison; -import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparisonProcessor; import org.elasticsearch.xpack.ql.tree.NodeInfo; import org.elasticsearch.xpack.ql.tree.Source; import org.elasticsearch.xpack.ql.type.DataType; @@ -21,7 +19,7 @@ import java.time.ZoneId; import java.util.Map; -public class LessThan extends EsqlBinaryComparison implements Negatable { +public class LessThan extends EsqlBinaryComparison implements Negatable { private static final Map evaluatorMap = Map.ofEntries( Map.entry(DataTypes.INTEGER, LessThanIntsEvaluator.Factory::new), @@ -35,8 +33,12 @@ public class LessThan extends EsqlBinaryComparison implements Negatable { +public class LessThanOrEqual extends EsqlBinaryComparison implements Negatable { private static final Map evaluatorMap = Map.ofEntries( Map.entry(DataTypes.INTEGER, LessThanOrEqualIntsEvaluator.Factory::new), Map.entry(DataTypes.DOUBLE, LessThanOrEqualDoublesEvaluator.Factory::new), @@ -34,8 +32,12 @@ public class LessThanOrEqual extends EsqlBinaryComparison implements Negatable { +public class NotEquals extends EsqlBinaryComparison implements Negatable { private static final Map evaluatorMap = Map.ofEntries( Map.entry(DataTypes.BOOLEAN, NotEqualsBoolsEvaluator.Factory::new), Map.entry(DataTypes.INTEGER, NotEqualsIntsEvaluator.Factory::new), @@ -41,11 +39,11 @@ public class NotEquals extends EsqlBinaryComparison implements Negatable namedTypeEntries() { // NamedExpressions of(NamedExpression.class, Alias.class, PlanNamedTypes::writeAlias, PlanNamedTypes::readAlias), // BinaryComparison - of(BinaryComparison.class, Equals.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), - of(BinaryComparison.class, NullEquals.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), - of(BinaryComparison.class, NotEquals.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), - of(BinaryComparison.class, GreaterThan.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), - of(BinaryComparison.class, GreaterThanOrEqual.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), - of(BinaryComparison.class, LessThan.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), - of(BinaryComparison.class, LessThanOrEqual.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), + of(EsqlBinaryComparison.class, Equals.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), + of(EsqlBinaryComparison.class, NotEquals.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), + of(EsqlBinaryComparison.class, GreaterThan.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), + of(EsqlBinaryComparison.class, GreaterThanOrEqual.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), + of(EsqlBinaryComparison.class, LessThan.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), + of(EsqlBinaryComparison.class, LessThanOrEqual.class, PlanNamedTypes::writeBinComparison, PlanNamedTypes::readBinComparison), // InsensitiveEquals of( InsensitiveEquals.class, @@ -1199,26 +1196,19 @@ static void writeUnsupportedEsField(PlanStreamOutput out, UnsupportedEsField uns // -- BinaryComparison - static BinaryComparison readBinComparison(PlanStreamInput in, String name) throws IOException { + static EsqlBinaryComparison readBinComparison(PlanStreamInput in, String name) throws IOException { var source = in.readSource(); - var operation = in.readEnum(BinaryComparisonProcessor.BinaryComparisonOperation.class); + EsqlBinaryComparison.BinaryComparisonOperation operation = EsqlBinaryComparison.BinaryComparisonOperation.readFromStream(in); var left = in.readExpression(); var right = in.readExpression(); + // TODO: Remove zoneId entirely var zoneId = in.readOptionalZoneId(); - return switch (operation) { - case EQ -> new Equals(source, left, right, zoneId); - case NULLEQ -> new NullEquals(source, left, right, zoneId); - case NEQ -> new NotEquals(source, left, right, zoneId); - case GT -> new GreaterThan(source, left, right, zoneId); - case GTE -> new GreaterThanOrEqual(source, left, right, zoneId); - case LT -> new LessThan(source, left, right, zoneId); - case LTE -> new LessThanOrEqual(source, left, right, zoneId); - }; - } - - static void writeBinComparison(PlanStreamOutput out, BinaryComparison binaryComparison) throws IOException { + return operation.buildNewInstance(source, left, right); + } + + static void writeBinComparison(PlanStreamOutput out, EsqlBinaryComparison binaryComparison) throws IOException { out.writeSource(binaryComparison.source()); - out.writeEnum(binaryComparison.function()); + binaryComparison.getFunctionType().writeTo(out); out.writeExpression(binaryComparison.left()); out.writeExpression(binaryComparison.right()); out.writeOptionalZoneId(binaryComparison.zoneId()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/EsqlBinaryComparisonTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/EsqlBinaryComparisonTests.java new file mode 100644 index 0000000000000..5e9e702ff8d12 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/evaluator/predicate/operator/comparison/EsqlBinaryComparisonTests.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation; +import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparisonProcessor; + +import java.io.IOException; +import java.util.List; + +public class EsqlBinaryComparisonTests extends ESTestCase { + + public void testSerializationOfBinaryComparisonOperation() throws IOException { + for (BinaryComparisonOperation op : BinaryComparisonOperation.values()) { + BinaryComparisonOperation newOp = copyWriteable( + op, + new NamedWriteableRegistry(List.of()), + BinaryComparisonOperation::readFromStream + ); + assertEquals(op, newOp); + } + } + + /** + * Test that a serialized + * {@link org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparisonProcessor.BinaryComparisonOperation} + * can be read back as a + * {@link BinaryComparisonOperation} + */ + public void testCompatibleWithQLBinaryComparisonOperation() throws IOException { + validateCompatibility(BinaryComparisonProcessor.BinaryComparisonOperation.EQ, BinaryComparisonOperation.EQ); + validateCompatibility(BinaryComparisonProcessor.BinaryComparisonOperation.NEQ, BinaryComparisonOperation.NEQ); + validateCompatibility(BinaryComparisonProcessor.BinaryComparisonOperation.GT, BinaryComparisonOperation.GT); + validateCompatibility(BinaryComparisonProcessor.BinaryComparisonOperation.GTE, BinaryComparisonOperation.GTE); + validateCompatibility(BinaryComparisonProcessor.BinaryComparisonOperation.LT, BinaryComparisonOperation.LT); + validateCompatibility(BinaryComparisonProcessor.BinaryComparisonOperation.LTE, BinaryComparisonOperation.LTE); + } + + private static void validateCompatibility( + BinaryComparisonProcessor.BinaryComparisonOperation original, + BinaryComparisonOperation expected + ) throws IOException { + try (BytesStreamOutput output = new BytesStreamOutput()) { + output.setTransportVersion(TransportVersion.current()); + output.writeEnum(original); + try (StreamInput in = new NamedWriteableAwareStreamInput(output.bytes().streamInput(), new NamedWriteableRegistry(List.of()))) { + in.setTransportVersion(TransportVersion.current()); + BinaryComparisonOperation newOp = BinaryComparisonOperation.readFromStream(in); + assertEquals(expected, newOp); + } + } + } + +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java index 57d86147a5bba..e22fa3c66384b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.SerializationTestUtils; import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.Equals; +import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.EsqlBinaryComparison; import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.GreaterThan; import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.GreaterThanOrEqual; import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.LessThan; @@ -45,7 +46,6 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mod; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Sub; -import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.NullEquals; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Dissect; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -86,7 +86,6 @@ import org.elasticsearch.xpack.ql.expression.function.Function; import org.elasticsearch.xpack.ql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.ql.expression.predicate.operator.arithmetic.ArithmeticOperation; -import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.ql.index.EsIndex; import org.elasticsearch.xpack.ql.options.EsSourceOptions; import org.elasticsearch.xpack.ql.plan.logical.Filter; @@ -103,10 +102,8 @@ import org.elasticsearch.xpack.ql.type.KeywordEsField; import org.elasticsearch.xpack.ql.type.TextEsField; import org.elasticsearch.xpack.ql.type.UnsupportedEsField; -import org.elasticsearch.xpack.ql.util.DateUtils; import java.io.IOException; -import java.time.ZoneId; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -334,15 +331,15 @@ public void testBinComparisonSimple() throws IOException { var orig = new Equals(Source.EMPTY, field("foo", DataTypes.DOUBLE), field("bar", DataTypes.DOUBLE)); BytesStreamOutput bso = new BytesStreamOutput(); PlanStreamOutput out = new PlanStreamOutput(bso, planNameRegistry); - out.writeNamed(BinaryComparison.class, orig); - var deser = (Equals) planStreamInput(bso).readNamed(BinaryComparison.class); + out.writeNamed(EsqlBinaryComparison.class, orig); + var deser = (Equals) planStreamInput(bso).readNamed(EsqlBinaryComparison.class); EqualsHashCodeTestUtils.checkEqualsAndHashCode(orig, unused -> deser); } public void testBinComparison() { Stream.generate(PlanNamedTypesTests::randomBinaryComparison) .limit(100) - .forEach(obj -> assertNamedType(BinaryComparison.class, obj)); + .forEach(obj -> assertNamedType(EsqlBinaryComparison.class, obj)); } public void testAggFunctionSimple() throws IOException { @@ -582,18 +579,17 @@ static InvalidMappedField randomInvalidMappedField() { ); } - static BinaryComparison randomBinaryComparison() { - int v = randomIntBetween(0, 6); + static EsqlBinaryComparison randomBinaryComparison() { + int v = randomIntBetween(0, 5); var left = field(randomName(), randomDataType()); var right = field(randomName(), randomDataType()); return switch (v) { - case 0 -> new Equals(Source.EMPTY, left, right, zoneIdOrNull()); - case 1 -> new NullEquals(Source.EMPTY, left, right, zoneIdOrNull()); - case 2 -> new NotEquals(Source.EMPTY, left, right, zoneIdOrNull()); - case 3 -> new GreaterThan(Source.EMPTY, left, right, zoneIdOrNull()); - case 4 -> new GreaterThanOrEqual(Source.EMPTY, left, right, zoneIdOrNull()); - case 5 -> new LessThan(Source.EMPTY, left, right, zoneIdOrNull()); - case 6 -> new LessThanOrEqual(Source.EMPTY, left, right, zoneIdOrNull()); + case 0 -> new Equals(Source.EMPTY, left, right); + case 1 -> new NotEquals(Source.EMPTY, left, right); + case 2 -> new GreaterThan(Source.EMPTY, left, right); + case 3 -> new GreaterThanOrEqual(Source.EMPTY, left, right); + case 4 -> new LessThan(Source.EMPTY, left, right); + case 5 -> new LessThanOrEqual(Source.EMPTY, left, right); default -> throw new AssertionError(v); }; } @@ -635,10 +631,6 @@ static NameId nameIdOrNull() { return randomBoolean() ? new NameId() : null; } - static ZoneId zoneIdOrNull() { - return randomBoolean() ? DateUtils.UTC : null; - } - static Nullability randomNullability() { int i = randomInt(2); return switch (i) {