Skip to content

Commit

Permalink
Do not use global ordinals strategy if the leaf reader context cannot…
Browse files Browse the repository at this point in the history
… be obtained (#108459)
  • Loading branch information
przemekwitek committed May 10, 2024
1 parent ac102e5 commit 79032ec
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 20 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/108459.yaml
@@ -0,0 +1,6 @@
pr: 108459
summary: Do not use global ordinals strategy if the leaf reader context cannot be
obtained
area: Machine Learning
type: bug
issues: []
Expand Up @@ -177,7 +177,8 @@ FrequentItemSet toFrequentItemSet(List<Field> fields) throws IOException {
int pos = items.nextSetBit(0);
while (pos > 0) {
Tuple<Integer, Object> item = transactionStore.getItem(topItemIds.getItemIdAt(pos - 1));
assert item.v1() < fields.size() : "item id exceed number of given items, did you configure eclat correctly?";
assert item.v1() < fields.size()
: "eclat error: item id (" + item.v1() + ") exceeds the number of given items (" + fields.size() + ")";
final Field field = fields.get(item.v1());
Object formattedValue = field.formatValue(item.v2());
String fieldName = fields.get(item.v1()).getName();
Expand Down Expand Up @@ -252,19 +253,20 @@ public FrequentItemSetCollector(TransactionStore transactionStore, TopItemIds to
this.topItemIds = topItemIds;
this.size = size;
this.min = min;
queue = new FrequentItemSetPriorityQueue(size);
frequentItemsByCount = Maps.newMapWithExpectedSize(size / 10);
this.queue = new FrequentItemSetPriorityQueue(size);
this.frequentItemsByCount = Maps.newMapWithExpectedSize(size / 10);
}

public FrequentItemSet[] finalizeAndGetResults(List<Field> fields) throws IOException {
FrequentItemSet[] topFrequentItems = new FrequentItemSet[size()];
FrequentItemSet[] topFrequentItems = new FrequentItemSet[queue.size()];
for (int i = topFrequentItems.length - 1; i >= 0; i--) {
topFrequentItems[i] = queue.pop().toFrequentItemSet(fields);
}
return topFrequentItems;
}

public int size() {
// Visible for testing
int size() {
return queue.size();
}

Expand Down
Expand Up @@ -86,17 +86,15 @@ protected ItemSetMapReduceAggregator(

boolean rewriteBasedOnOrdinals = false;

if (ctx.isPresent()) {
for (var c : configsAndValueFilters) {
ItemSetMapReduceValueSource e = context.getValuesSourceRegistry()
.getAggregator(registryKey, c.v1())
.build(c.v1(), id++, c.v2(), ordinalOptimization, ctx.get());
if (e.getField().getName() != null) {
fields.add(e.getField());
valueSources.add(e);
}
rewriteBasedOnOrdinals |= e.usesOrdinals();
for (var c : configsAndValueFilters) {
ItemSetMapReduceValueSource e = context.getValuesSourceRegistry()
.getAggregator(registryKey, c.v1())
.build(c.v1(), id++, c.v2(), ordinalOptimization, ctx);
if (e.getField().getName() != null) {
fields.add(e.getField());
valueSources.add(e);
}
rewriteBasedOnOrdinals |= e.usesOrdinals();
}

this.rewriteBasedOnOrdinals = rewriteBasedOnOrdinals;
Expand Down
Expand Up @@ -37,6 +37,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

/**
* Interface to extract values from Lucene in order to feed it into the MapReducer.
Expand All @@ -53,7 +54,7 @@ ItemSetMapReduceValueSource build(
int id,
IncludeExclude includeExclude,
AbstractItemSetMapReducer.OrdinalOptimization ordinalOptimization,
LeafReaderContext ctx
Optional<LeafReaderContext> ctx
) throws IOException;
}

Expand Down Expand Up @@ -345,20 +346,21 @@ public KeywordValueSource(
int id,
IncludeExclude includeExclude,
AbstractItemSetMapReducer.OrdinalOptimization ordinalOptimization,
LeafReaderContext ctx
Optional<LeafReaderContext> ctx
) throws IOException {
super(config, id, ValueFormatter.BYTES_REF);

if (AbstractItemSetMapReducer.OrdinalOptimization.GLOBAL_ORDINALS.equals(ordinalOptimization)
&& config.getValuesSource() instanceof Bytes.WithOrdinals
&& ((Bytes.WithOrdinals) config.getValuesSource()).supportsGlobalOrdinalsMapping()) {
&& ((Bytes.WithOrdinals) config.getValuesSource()).supportsGlobalOrdinalsMapping()
&& ctx.isPresent()) {
logger.debug("Use ordinals for field [{}]", config.fieldContext().field());

this.executionStrategy = new GlobalOrdinalsStrategy(
getField(),
(Bytes.WithOrdinals) config.getValuesSource(),
includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(config.format()),
ctx
ctx.get()
);
} else {
this.executionStrategy = new MapStrategy(
Expand Down Expand Up @@ -394,7 +396,7 @@ public NumericValueSource(
int id,
IncludeExclude includeExclude,
AbstractItemSetMapReducer.OrdinalOptimization unusedOrdinalOptimization,
LeafReaderContext unusedCtx
Optional<LeafReaderContext> unusedCtx
) {
super(config, id, ValueFormatter.LONG);
this.source = (Numeric) config.getValuesSource();
Expand Down

0 comments on commit 79032ec

Please sign in to comment.