Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolves #1691: Lucene negative queries #1694

Merged
merged 3 commits into from Jun 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -52,6 +52,16 @@ public LuceneBooleanQuery(@Nonnull List<LuceneQueryClause> children, @Nonnull Bo
this.occur = occur;
}

@Nonnull
protected List<LuceneQueryClause> getChildren() {
return children;
}

@Nonnull
protected BooleanClause.Occur getOccur() {
return occur;
}

@Override
public Query bind(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @Nonnull EvaluationContext context) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Expand Down
@@ -0,0 +1,104 @@
/*
* LuceneNotQuery.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.record.EvaluationContext;
import com.apple.foundationdb.record.PlanHashable;
import com.apple.foundationdb.record.metadata.Index;
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreBase;
import com.apple.foundationdb.record.query.plan.cascades.explain.Attribute;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;

import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Binder for a negation of clauses.
* Because of the way Lucene {@link BooleanQuery} works, this actually represents set subtraction,
* with a set of positive and negative clauses. For the same reason, there is no disjunctive analogue.
*/
@API(API.Status.UNSTABLE)
public class LuceneNotQuery extends LuceneBooleanQuery {
@Nonnull
private final List<LuceneQueryClause> negatedChildren;

public LuceneNotQuery(@Nonnull List<LuceneQueryClause> children, @Nonnull List<LuceneQueryClause> negatedChildren) {
super(children, BooleanClause.Occur.MUST);
this.negatedChildren = negatedChildren;
}

public LuceneNotQuery(@Nonnull LuceneQueryClause negatedChild) {
this(Collections.emptyList(), Collections.singletonList(negatedChild));
}

@Nonnull
protected List<LuceneQueryClause> getNegatedChildren() {
return negatedChildren;
}

@Override
public Query bind(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @Nonnull EvaluationContext context) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
if (getChildren().isEmpty()) {
// Lucene cannot handle all negated clauses.
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
} else {
for (LuceneQueryClause child : getChildren()) {
builder.add(child.bind(store, index, context), BooleanClause.Occur.MUST);
}
}
for (LuceneQueryClause child : negatedChildren) {
builder.add(child.bind(store, index, context), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
}

@Override
public void getPlannerGraphDetails(@Nonnull ImmutableList.Builder<String> detailsBuilder, @Nonnull ImmutableMap.Builder<String, Attribute> attributeMapBuilder) {
super.getPlannerGraphDetails(detailsBuilder, attributeMapBuilder);
for (LuceneQueryClause child : negatedChildren) {
child.getPlannerGraphDetails(detailsBuilder, attributeMapBuilder);
}
}

@Override
public int planHash(@Nonnull final PlanHashKind hashKind) {
return super.planHash() - PlanHashable.iterablePlanHash(hashKind, negatedChildren);
}

@Override
public String toString() {
return Stream.concat(
getChildren().stream().map(Objects::toString),
negatedChildren.stream().map(c -> "NOT " + c)
).collect(Collectors.joining(" AND "));
}
}
Expand Up @@ -37,6 +37,7 @@
import com.apple.foundationdb.record.query.expressions.ComponentWithSingleChild;
import com.apple.foundationdb.record.query.expressions.FieldWithComparison;
import com.apple.foundationdb.record.query.expressions.NestedField;
import com.apple.foundationdb.record.query.expressions.NotComponent;
import com.apple.foundationdb.record.query.expressions.OneOfThemWithComponent;
import com.apple.foundationdb.record.query.expressions.OrComponent;
import com.apple.foundationdb.record.query.expressions.QueryComponent;
Expand Down Expand Up @@ -100,6 +101,10 @@ private ScoredPlan planLucene(@Nonnull CandidateScan candidateScan,
if (!groupingMatch.getType().equals((QueryToKeyMatcher.MatchType.EQUALITY))) {
return null;
}
if (filterMask.allSatisfied()) {
// If filter is only group predicates, can skip trying to find non-trivial Lucene scan.
return null;
}
groupingComparisons = new ScanComparisons(groupingMatch.getEqualityComparisons(), Collections.emptySet());
} else {
groupingComparisons = ScanComparisons.EMPTY;
Expand Down Expand Up @@ -202,6 +207,8 @@ private LuceneQueryClause getQueryForFilter(@Nonnull LucenePlanState state, @Non
return getQueryForLuceneComponent(state, (LuceneQueryComponent)filter, filterMask);
} else if (filter instanceof AndOrComponent) {
return getQueryForAndOr(state, (AndOrComponent) filter, parentFieldName, filterMask);
} else if (filter instanceof NotComponent) {
return getQueryForNot(state, (NotComponent) filter, parentFieldName, filterMask);
} else if (filter instanceof FieldWithComparison) {
return getQueryForFieldWithComparison(state, (FieldWithComparison) filter, parentFieldName, filterMask);
} else if (filter instanceof OneOfThemWithComponent) {
Expand Down Expand Up @@ -240,6 +247,8 @@ private LuceneQueryClause getQueryForAndOr(@Nonnull LucenePlanState state, @Nonn
final Iterator<FilterSatisfiedMask> subFilterMasks = filterMask != null ? filterMask.getChildren().iterator() : null;
final List<QueryComponent> filters = filter.getChildren();
final List<LuceneQueryClause> childClauses = new ArrayList<>(filters.size());
final List<LuceneQueryClause> negatedChildren = new ArrayList<>(0);
final BooleanClause.Occur occur = filter instanceof OrComponent ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST;
for (QueryComponent subFilter : filters) {
final FilterSatisfiedMask childMask = subFilterMasks != null ? subFilterMasks.next() : null;
LuceneQueryClause childClause = getQueryForFilter(state, subFilter, parentFieldName, childMask);
Expand All @@ -252,18 +261,81 @@ private LuceneQueryClause getQueryForAndOr(@Nonnull LucenePlanState state, @Nonn
if (childMask != null) {
childMask.setSatisfied(true);
}
childClauses.add(childClause);
if (childClause instanceof LuceneBooleanQuery && ((LuceneBooleanQuery)childClause).getOccur() == occur) {
childClauses.addAll(((LuceneBooleanQuery)childClause).getChildren());
if (childClause instanceof LuceneNotQuery) {
negatedChildren.addAll(((LuceneNotQuery)childClause).getNegatedChildren());
}
} else {
childClauses.add(childClause);
}
}
if (filterMask != null && filterMask.getUnsatisfiedFilters().isEmpty()) {
filterMask.setSatisfied(true);
}
if (!negatedChildren.isEmpty()) {
return new LuceneNotQuery(childClauses, negatedChildren);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If someone did something like lucene_clause_a OR NOT lucene_clause_b, could this transformation lead to this being translated into something like (lucene_clause_a) -(lucene_clause_b) (effectively turning the OR to an AND)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think so. LuceneNotQuery always has occur = MUST, so it is not possible for negatedChildren to be added to in the SHOULD case. Note how flattening only happens when occur matches.

}
// Don't do Lucene scan if none are satisfied, though.
if (childClauses.isEmpty()) {
return null;
}
final BooleanClause.Occur occur = filter instanceof OrComponent ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST;
return new LuceneBooleanQuery(childClauses, occur);
}

@Nullable
private LuceneQueryClause getQueryForNot(@Nonnull LucenePlanState state, @Nonnull NotComponent filter,
@Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterMask) {
final LuceneQueryClause childClause = getQueryForFilter(state, filter.getChild(), parentFieldName, filterMask == null ? null : filterMask.getChildren().get(0));
if (childClause == null) {
return null;
}
if (filterMask != null) {
filterMask.setSatisfied(true);
}
return negate(childClause);
}

@Nonnull
private static LuceneQueryClause negate(@Nonnull LuceneQueryClause clause) {
if (clause instanceof LuceneBooleanQuery) {
final LuceneBooleanQuery booleanQuery = (LuceneBooleanQuery)clause;
switch (booleanQuery.getOccur()) {
case MUST:
List<LuceneQueryClause> clauses = new ArrayList<>();
for (LuceneQueryClause child : booleanQuery.getChildren()) {
clauses.add(negate(child));
}
if (clause instanceof LuceneNotQuery) {
LuceneNotQuery notQuery = (LuceneNotQuery)clause;
if (clauses.isEmpty() && notQuery.getNegatedChildren().size() == 1) {
return notQuery.getNegatedChildren().get(0);
}
clauses.addAll(notQuery.getNegatedChildren());
}
return new LuceneBooleanQuery(clauses, BooleanClause.Occur.SHOULD);
case SHOULD:
List<LuceneQueryClause> positive = new ArrayList<>();
List<LuceneQueryClause> negative = new ArrayList<>();
for (LuceneQueryClause child : booleanQuery.getChildren()) {
if (child instanceof LuceneBooleanQuery) {
positive.add(negate(child));
} else {
negative.add(child);
}
}
if (negative.isEmpty()) {
return new LuceneBooleanQuery(positive, BooleanClause.Occur.MUST);
} else {
return new LuceneNotQuery(positive, negative);
}
default:
throw new RecordCoreException("Unsupported boolean query occur: " + booleanQuery);
}
}
return new LuceneNotQuery(clause);
}

@Nullable
private LuceneQueryClause getQueryForFieldWithComparison(@Nonnull LucenePlanState state, @Nonnull FieldWithComparison filter,
@Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterSatisfiedMask) {
Expand Down
Expand Up @@ -20,6 +20,7 @@

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.record.EvaluationContext;
import com.apple.foundationdb.record.RecordCursor;
import com.apple.foundationdb.record.RecordMetaData;
import com.apple.foundationdb.record.RecordMetaDataBuilder;
Expand Down Expand Up @@ -55,6 +56,7 @@
import com.google.protobuf.Message;
import org.apache.commons.lang3.tuple.Pair;
import org.hamcrest.Matcher;
import org.hamcrest.Matchers;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
Expand Down Expand Up @@ -94,6 +96,7 @@
import static com.apple.foundationdb.record.query.plan.match.PlanMatchers.primaryKeyDistinct;
import static com.apple.foundationdb.record.query.plan.match.PlanMatchers.scan;
import static com.apple.foundationdb.record.query.plan.match.PlanMatchers.typeFilter;
import static com.apple.foundationdb.record.query.plan.match.PlanMatchers.unbounded;
import static com.apple.foundationdb.record.query.plan.match.PlanMatchers.unorderedUnion;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.aMapWithSize;
Expand Down Expand Up @@ -905,4 +908,115 @@ void covering() throws Exception {
}
}

@Test
void fullGroupScan() throws Exception {
try (FDBRecordContext context = openContext()) {
openRecordStore(context, md -> {
md.removeIndex(MAP_AND_FIELD_ON_LUCENE_INDEX.getName());
}, SIMPLE_TEXT_SUFFIXES);
QueryComponent groupFilter = Query.field("entry").oneOfThem().matches(Query.field("key").equalsValue("a"));
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(MAP_DOC)
.setFilter(groupFilter)
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = filter(groupFilter, typeFilter(equalTo(Collections.singleton(TextIndexTestUtils.MAP_DOC)), scan(unbounded())));
assertThat(plan, matcher);
}

try (FDBRecordContext context = openContext()) {
openRecordStore(context, md -> {
md.removeIndex(MAP_AND_FIELD_ON_LUCENE_INDEX.getName());
md.removeIndex(MAP_ON_LUCENE_INDEX.getName());
md.addIndex(MAP_DOC, new Index("GroupedMap", new GroupingKeyExpression(concat(field("group"), mainExpression), 1), LuceneIndexTypes.LUCENE));
}, SIMPLE_TEXT_SUFFIXES);
QueryComponent groupFilter = Query.and(
Query.field("group").equalsValue(1L),
Query.field("entry").oneOfThem().matches(Query.field("key").equalsValue("a")));
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(MAP_DOC)
.setFilter(groupFilter)
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = filter(groupFilter, typeFilter(equalTo(Collections.singleton(TextIndexTestUtils.MAP_DOC)), scan(unbounded())));
assertThat(plan, matcher);
}
}

@Test
void andNot() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.and(filter1, Query.not(filter2)))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("MULTI Verona AND NOT MULTI traffic")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+(text:verona) -(text:traffic)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(2L), Set.copyOf(primaryKeys));
}
}

@Test
void justNot() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.not(filter))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("NOT MULTI Verona")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(0L, 1L, 3L, 5L), Set.copyOf(primaryKeys));
}
}

@Test
void notOr() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.not(Query.or(filter1, filter2)))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("NOT MULTI Verona AND NOT MULTI traffic")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona) -(text:traffic)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(0L, 1L, 3L), Set.copyOf(primaryKeys));
}
}

private org.apache.lucene.search.Query getLuceneQuery(RecordQueryPlan plan) {
LuceneIndexQueryPlan indexPlan = (LuceneIndexQueryPlan)plan;
LuceneScanQuery scan = (LuceneScanQuery)indexPlan.getScanParameters().bind(recordStore, recordStore.getRecordMetaData().getIndex(indexPlan.getIndexName()), EvaluationContext.EMPTY);
return scan.getQuery();
}
}