Skip to content

Commit

Permalink
Resolves FoundationDB#1691: Lucene negative queries
Browse files Browse the repository at this point in the history
  • Loading branch information
MMcM committed May 24, 2022
1 parent f1ef0b3 commit 70948fd
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 11 deletions.
22 changes: 21 additions & 1 deletion .idea/compiler.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Expand Up @@ -30,6 +30,7 @@
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;

import javax.annotation.Nonnull;
Expand All @@ -52,10 +53,37 @@ public LuceneBooleanQuery(@Nonnull List<LuceneQueryClause> children, @Nonnull Bo
this.occur = occur;
}

@Nonnull
protected List<LuceneQueryClause> getChildren() {
return children;
}

@Nonnull
protected BooleanClause.Occur getOccur() {
return occur;
}

@Override
public Query bind(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @Nonnull EvaluationContext context) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (LuceneQueryClause child : children) {
for (int i = 0; i < children.size(); i++) {
LuceneQueryClause child = children.get(i);
if (child instanceof LuceneNotQuery && occur == BooleanClause.Occur.MUST) {
if (i == 0) {
boolean others = false;
for (int j = 1; j < children.size(); j++) {
if (!(children.get(j) instanceof LuceneNotQuery)) {
others = true;
break;
}
}
if (!others) {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
}
}
builder.add(((LuceneNotQuery)child).getChild().bind(store, index, context), BooleanClause.Occur.MUST_NOT);
continue;
}
builder.add(child.bind(store, index, context), occur);
}
return builder.build();
Expand Down
@@ -0,0 +1,82 @@
/*
* LuceneNotQuery.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.record.EvaluationContext;
import com.apple.foundationdb.record.metadata.Index;
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreBase;
import com.apple.foundationdb.record.query.plan.cascades.explain.Attribute;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;

import javax.annotation.Nonnull;

/**
* Binder for a negation of another clause.
* Note that special casing is required to coalesce with a {@link LuceneBooleanQuery} with {@code MUST}.
*/
@API(API.Status.UNSTABLE)
public class LuceneNotQuery extends LuceneQueryClause {
@Nonnull
private final LuceneQueryClause child;

public LuceneNotQuery(@Nonnull LuceneQueryClause child) {
this.child = child;
}

@Nonnull
protected LuceneQueryClause getChild() {
return child;
}

@Override
public Query bind(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @Nonnull EvaluationContext context) {
return negate(child.bind(store, index, context));
}

@Override
public void getPlannerGraphDetails(@Nonnull ImmutableList.Builder<String> detailsBuilder, @Nonnull ImmutableMap.Builder<String, Attribute> attributeMapBuilder) {
child.getPlannerGraphDetails(detailsBuilder, attributeMapBuilder);
}

@Override
public int planHash(@Nonnull final PlanHashKind hashKind) {
return child.planHash() + 3;
}

@Override
public String toString() {
return "NOT " + child;
}

// Used as a last resort when can't be merged with parent AND.
protected static Query negate(@Nonnull Query query) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(query, BooleanClause.Occur.MUST_NOT);
return builder.build();
}
}
Expand Up @@ -37,6 +37,7 @@
import com.apple.foundationdb.record.query.expressions.ComponentWithSingleChild;
import com.apple.foundationdb.record.query.expressions.FieldWithComparison;
import com.apple.foundationdb.record.query.expressions.NestedField;
import com.apple.foundationdb.record.query.expressions.NotComponent;
import com.apple.foundationdb.record.query.expressions.OneOfThemWithComponent;
import com.apple.foundationdb.record.query.expressions.OrComponent;
import com.apple.foundationdb.record.query.expressions.QueryComponent;
Expand All @@ -53,11 +54,13 @@

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.awt.Component;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
* A planner to implement lucene query planning so that we can isolate the lucene functionality to
Expand Down Expand Up @@ -201,6 +204,8 @@ private LuceneQueryClause getQueryForFilter(@Nonnull LucenePlanState state, @Non
return getQueryForLuceneComponent(state, (LuceneQueryComponent)filter, filterMask);
} else if (filter instanceof AndOrComponent) {
return getQueryForAndOr(state, (AndOrComponent) filter, parentFieldName, filterMask);
} else if (filter instanceof NotComponent) {
return getQueryForNot(state, (NotComponent) filter, parentFieldName, filterMask);
} else if (filter instanceof FieldWithComparison) {
return getQueryForFieldWithComparison(state, (FieldWithComparison) filter, parentFieldName, filterMask);
} else if (filter instanceof OneOfThemWithComponent) {
Expand Down Expand Up @@ -263,6 +268,32 @@ private LuceneQueryClause getQueryForAndOr(@Nonnull LucenePlanState state, @Nonn
return new LuceneBooleanQuery(childClauses, occur);
}

@Nullable
private LuceneQueryClause getQueryForNot(@Nonnull LucenePlanState state, @Nonnull NotComponent filter,
@Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterMask) {
LuceneQueryClause childClause = getQueryForFilter(state, filter.getChild(), parentFieldName, filterMask == null ? null : filterMask.getChildren().get(0));
if (childClause == null) {
return null;
}
if (filterMask != null) {
filterMask.setSatisfied(true);
}
return negate(childClause);
}

@Nonnull
private static LuceneQueryClause negate(@Nonnull LuceneQueryClause childClause) {
if (childClause instanceof LuceneBooleanQuery) {
LuceneBooleanQuery booleanQuery = (LuceneBooleanQuery)childClause;
return new LuceneBooleanQuery(booleanQuery.getChildren().stream().map(LucenePlanner::negate).collect(Collectors.toList()),
booleanQuery.getOccur() == BooleanClause.Occur.MUST ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST);
}
if (childClause instanceof LuceneNotQuery) {
return ((LuceneNotQuery)childClause).getChild();
}
return new LuceneNotQuery(childClause);
}

@Nullable
private LuceneQueryClause getQueryForFieldWithComparison(@Nonnull LucenePlanState state, @Nonnull FieldWithComparison filter,
@Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterSatisfiedMask) {
Expand Down
Expand Up @@ -39,7 +39,6 @@
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -180,13 +179,6 @@ public static LuceneQueryFieldComparisonClause create(@Nonnull String field, @No
}
}

protected static Query negate(@Nonnull Query query) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(query, BooleanClause.Occur.MUST_NOT);
return builder.build();
}

static class NullQuery extends LuceneQueryFieldComparisonClause {
public NullQuery(@Nonnull String field, @Nonnull LuceneIndexExpressions.DocumentFieldType fieldType, @Nonnull Comparisons.Comparison comparison) {
super(field, fieldType, comparison);
Expand All @@ -199,7 +191,7 @@ public Query bind(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @N
return allValues;
} else {
// *:* -f[* TO *]
return negate(allValues);
return LuceneNotQuery.negate(allValues);
}
}
}
Expand Down
Expand Up @@ -20,6 +20,7 @@

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.record.EvaluationContext;
import com.apple.foundationdb.record.RecordCursor;
import com.apple.foundationdb.record.RecordMetaData;
import com.apple.foundationdb.record.RecordMetaDataBuilder;
Expand Down Expand Up @@ -55,6 +56,7 @@
import com.google.protobuf.Message;
import org.apache.commons.lang3.tuple.Pair;
import org.hamcrest.Matcher;
import org.hamcrest.Matchers;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
Expand Down Expand Up @@ -905,4 +907,80 @@ void covering() throws Exception {
}
}

@Test
void andNot() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.and(filter1, Query.not(filter2)))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("MULTI Verona AND NOT MULTI traffic")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+(text:verona) -(text:traffic)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(2L), Set.copyOf(primaryKeys));
}
}

@Test
void justNot() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.not(filter))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("NOT MULTI Verona")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(0L, 1L, 3L, 5L), Set.copyOf(primaryKeys));
}
}

@Test
void notOr() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.not(Query.or(filter1, filter2)))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("NOT MULTI Verona AND NOT MULTI traffic")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona) -(text:traffic)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(0L, 1L, 3L), Set.copyOf(primaryKeys));
}
}

private org.apache.lucene.search.Query getLuceneQuery(RecordQueryPlan plan) {
LuceneIndexQueryPlan indexPlan = (LuceneIndexQueryPlan)plan;
LuceneScanQuery scan = (LuceneScanQuery)indexPlan.getScanParameters().bind(recordStore, recordStore.getRecordMetaData().getIndex(indexPlan.getIndexName()), EvaluationContext.EMPTY);
return scan.getQuery();
}
}

0 comments on commit 70948fd

Please sign in to comment.