From 70948fd417cc159ec129c46ceec7e838af2c867d Mon Sep 17 00:00:00 2001 From: Mike McMahon Date: Tue, 24 May 2022 16:20:01 -0700 Subject: [PATCH] Resolves #1691: Lucene negative queries --- .idea/compiler.xml | 22 ++++- .../record/lucene/LuceneBooleanQuery.java | 30 ++++++- .../record/lucene/LuceneNotQuery.java | 82 +++++++++++++++++++ .../record/lucene/LucenePlanner.java | 31 +++++++ .../LuceneQueryFieldComparisonClause.java | 10 +-- .../record/lucene/FDBLuceneQueryTest.java | 78 ++++++++++++++++++ 6 files changed, 242 insertions(+), 11 deletions(-) create mode 100644 fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneNotQuery.java diff --git a/.idea/compiler.xml b/.idea/compiler.xml index 20b4a697ee..abc38bb064 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -51,13 +51,33 @@ - + + + + + + + + + + + + + + + + + + + + + diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBooleanQuery.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBooleanQuery.java index 7ce242a964..d557fc172a 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBooleanQuery.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBooleanQuery.java @@ -30,6 +30,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import javax.annotation.Nonnull; @@ -52,10 +53,37 @@ public LuceneBooleanQuery(@Nonnull List children, @Nonnull Bo this.occur = occur; } + @Nonnull + protected List getChildren() { + return children; + } + + @Nonnull + protected BooleanClause.Occur getOccur() { + return occur; + } + @Override public Query bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index index, @Nonnull EvaluationContext context) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); - for (LuceneQueryClause child : children) { + for (int i = 0; i < children.size(); i++) { + LuceneQueryClause child = children.get(i); + if (child instanceof LuceneNotQuery && occur == BooleanClause.Occur.MUST) { + if (i == 0) { + boolean others = false; + for (int j = 1; j < children.size(); j++) { + if (!(children.get(j) instanceof LuceneNotQuery)) { + others = true; + break; + } + } + if (!others) { + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + } + } + builder.add(((LuceneNotQuery)child).getChild().bind(store, index, context), BooleanClause.Occur.MUST_NOT); + continue; + } builder.add(child.bind(store, index, context), occur); } return builder.build(); diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneNotQuery.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneNotQuery.java new file mode 100644 index 0000000000..ac21477714 --- /dev/null +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneNotQuery.java @@ -0,0 +1,82 @@ +/* + * LuceneNotQuery.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.record.lucene; + +import com.apple.foundationdb.annotation.API; +import com.apple.foundationdb.record.EvaluationContext; +import com.apple.foundationdb.record.metadata.Index; +import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreBase; +import com.apple.foundationdb.record.query.plan.cascades.explain.Attribute; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; + +import javax.annotation.Nonnull; + +/** + * Binder for a negation of another clause. + * Note that special casing is required to coalesce with a {@link LuceneBooleanQuery} with {@code MUST}. + */ +@API(API.Status.UNSTABLE) +public class LuceneNotQuery extends LuceneQueryClause { + @Nonnull + private final LuceneQueryClause child; + + public LuceneNotQuery(@Nonnull LuceneQueryClause child) { + this.child = child; + } + + @Nonnull + protected LuceneQueryClause getChild() { + return child; + } + + @Override + public Query bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index index, @Nonnull EvaluationContext context) { + return negate(child.bind(store, index, context)); + } + + @Override + public void getPlannerGraphDetails(@Nonnull ImmutableList.Builder detailsBuilder, @Nonnull ImmutableMap.Builder attributeMapBuilder) { + child.getPlannerGraphDetails(detailsBuilder, attributeMapBuilder); + } + + @Override + public int planHash(@Nonnull final PlanHashKind hashKind) { + return child.planHash() + 3; + } + + @Override + public String toString() { + return "NOT " + child; + } + + // Used as a last resort when can't be merged with parent AND. + protected static Query negate(@Nonnull Query query) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + builder.add(query, BooleanClause.Occur.MUST_NOT); + return builder.build(); + } +} diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java index a216826264..0aff7a6aec 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java @@ -37,6 +37,7 @@ import com.apple.foundationdb.record.query.expressions.ComponentWithSingleChild; import com.apple.foundationdb.record.query.expressions.FieldWithComparison; import com.apple.foundationdb.record.query.expressions.NestedField; +import com.apple.foundationdb.record.query.expressions.NotComponent; import com.apple.foundationdb.record.query.expressions.OneOfThemWithComponent; import com.apple.foundationdb.record.query.expressions.OrComponent; import com.apple.foundationdb.record.query.expressions.QueryComponent; @@ -53,11 +54,13 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.awt.Component; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * A planner to implement lucene query planning so that we can isolate the lucene functionality to @@ -201,6 +204,8 @@ private LuceneQueryClause getQueryForFilter(@Nonnull LucenePlanState state, @Non return getQueryForLuceneComponent(state, (LuceneQueryComponent)filter, filterMask); } else if (filter instanceof AndOrComponent) { return getQueryForAndOr(state, (AndOrComponent) filter, parentFieldName, filterMask); + } else if (filter instanceof NotComponent) { + return getQueryForNot(state, (NotComponent) filter, parentFieldName, filterMask); } else if (filter instanceof FieldWithComparison) { return getQueryForFieldWithComparison(state, (FieldWithComparison) filter, parentFieldName, filterMask); } else if (filter instanceof OneOfThemWithComponent) { @@ -263,6 +268,32 @@ private LuceneQueryClause getQueryForAndOr(@Nonnull LucenePlanState state, @Nonn return new LuceneBooleanQuery(childClauses, occur); } + @Nullable + private LuceneQueryClause getQueryForNot(@Nonnull LucenePlanState state, @Nonnull NotComponent filter, + @Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterMask) { + LuceneQueryClause childClause = getQueryForFilter(state, filter.getChild(), parentFieldName, filterMask == null ? null : filterMask.getChildren().get(0)); + if (childClause == null) { + return null; + } + if (filterMask != null) { + filterMask.setSatisfied(true); + } + return negate(childClause); + } + + @Nonnull + private static LuceneQueryClause negate(@Nonnull LuceneQueryClause childClause) { + if (childClause instanceof LuceneBooleanQuery) { + LuceneBooleanQuery booleanQuery = (LuceneBooleanQuery)childClause; + return new LuceneBooleanQuery(booleanQuery.getChildren().stream().map(LucenePlanner::negate).collect(Collectors.toList()), + booleanQuery.getOccur() == BooleanClause.Occur.MUST ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST); + } + if (childClause instanceof LuceneNotQuery) { + return ((LuceneNotQuery)childClause).getChild(); + } + return new LuceneNotQuery(childClause); + } + @Nullable private LuceneQueryClause getQueryForFieldWithComparison(@Nonnull LucenePlanState state, @Nonnull FieldWithComparison filter, @Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterSatisfiedMask) { diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryFieldComparisonClause.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryFieldComparisonClause.java index fd0f577c00..ddb51ae860 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryFieldComparisonClause.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryFieldComparisonClause.java @@ -39,7 +39,6 @@ import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; @@ -180,13 +179,6 @@ public static LuceneQueryFieldComparisonClause create(@Nonnull String field, @No } } - protected static Query negate(@Nonnull Query query) { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); - builder.add(query, BooleanClause.Occur.MUST_NOT); - return builder.build(); - } - static class NullQuery extends LuceneQueryFieldComparisonClause { public NullQuery(@Nonnull String field, @Nonnull LuceneIndexExpressions.DocumentFieldType fieldType, @Nonnull Comparisons.Comparison comparison) { super(field, fieldType, comparison); @@ -199,7 +191,7 @@ public Query bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index index, @N return allValues; } else { // *:* -f[* TO *] - return negate(allValues); + return LuceneNotQuery.negate(allValues); } } } diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/FDBLuceneQueryTest.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/FDBLuceneQueryTest.java index 74dd241b3a..6e2894b7e8 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/FDBLuceneQueryTest.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/FDBLuceneQueryTest.java @@ -20,6 +20,7 @@ package com.apple.foundationdb.record.lucene; +import com.apple.foundationdb.record.EvaluationContext; import com.apple.foundationdb.record.RecordCursor; import com.apple.foundationdb.record.RecordMetaData; import com.apple.foundationdb.record.RecordMetaDataBuilder; @@ -55,6 +56,7 @@ import com.google.protobuf.Message; import org.apache.commons.lang3.tuple.Pair; import org.hamcrest.Matcher; +import org.hamcrest.Matchers; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -905,4 +907,80 @@ void covering() throws Exception { } } + @Test + void andNot() throws Exception { + initializeFlat(); + try (FDBRecordContext context = openContext()) { + openRecordStore(context); + final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true); + final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true); + RecordQuery query = RecordQuery.newBuilder() + .setRecordType(TextIndexTestUtils.SIMPLE_DOC) + .setFilter(Query.and(filter1, Query.not(filter2))) + .build(); + RecordQueryPlan plan = planner.plan(query); + Matcher matcher = indexScan(allOf(indexScan("Complex$text_index"), + indexScanType(LuceneScanTypes.BY_LUCENE), + scanParams(query(hasToString("MULTI Verona AND NOT MULTI traffic"))))); + assertThat(plan, matcher); + assertThat(getLuceneQuery(plan), Matchers.hasToString("+(text:verona) -(text:traffic)")); + RecordCursor> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan); + RecordCursor map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey); + List primaryKeys = map.map(t -> t.getLong(0)).asList().get(); + assertEquals(Set.of(2L), Set.copyOf(primaryKeys)); + } + } + + @Test + void justNot() throws Exception { + initializeFlat(); + try (FDBRecordContext context = openContext()) { + openRecordStore(context); + final QueryComponent filter = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true); + RecordQuery query = RecordQuery.newBuilder() + .setRecordType(TextIndexTestUtils.SIMPLE_DOC) + .setFilter(Query.not(filter)) + .build(); + RecordQueryPlan plan = planner.plan(query); + Matcher matcher = indexScan(allOf(indexScan("Complex$text_index"), + indexScanType(LuceneScanTypes.BY_LUCENE), + scanParams(query(hasToString("NOT MULTI Verona"))))); + assertThat(plan, matcher); + assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona)")); + RecordCursor> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan); + RecordCursor map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey); + List primaryKeys = map.map(t -> t.getLong(0)).asList().get(); + assertEquals(Set.of(0L, 1L, 3L, 5L), Set.copyOf(primaryKeys)); + } + } + + @Test + void notOr() throws Exception { + initializeFlat(); + try (FDBRecordContext context = openContext()) { + openRecordStore(context); + final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true); + final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true); + RecordQuery query = RecordQuery.newBuilder() + .setRecordType(TextIndexTestUtils.SIMPLE_DOC) + .setFilter(Query.not(Query.or(filter1, filter2))) + .build(); + RecordQueryPlan plan = planner.plan(query); + Matcher matcher = indexScan(allOf(indexScan("Complex$text_index"), + indexScanType(LuceneScanTypes.BY_LUCENE), + scanParams(query(hasToString("NOT MULTI Verona AND NOT MULTI traffic"))))); + assertThat(plan, matcher); + assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona) -(text:traffic)")); + RecordCursor> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan); + RecordCursor map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey); + List primaryKeys = map.map(t -> t.getLong(0)).asList().get(); + assertEquals(Set.of(0L, 1L, 3L), Set.copyOf(primaryKeys)); + } + } + + private org.apache.lucene.search.Query getLuceneQuery(RecordQueryPlan plan) { + LuceneIndexQueryPlan indexPlan = (LuceneIndexQueryPlan)plan; + LuceneScanQuery scan = (LuceneScanQuery)indexPlan.getScanParameters().bind(recordStore, recordStore.getRecordMetaData().getIndex(indexPlan.getIndexName()), EvaluationContext.EMPTY); + return scan.getQuery(); + } }