Skip to content

Commit

Permalink
Resolves FoundationDB#1676: Lucene merge based on probability
Browse files Browse the repository at this point in the history
  • Loading branch information
tian-yizuo committed May 16, 2022
1 parent 4be2721 commit a94fab8
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 15 deletions.
2 changes: 1 addition & 1 deletion docs/ReleaseNotes.md
Expand Up @@ -25,7 +25,7 @@ This release also updates downstream dependency versions. Most notably, the prot
* **Bug fix** Fix 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Bug fix** Fix 4 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Bug fix** Fix 5 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Performance** Improvement 1 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Performance** Lucene merge based on probability [(Issue #1676)](https://github.com/FoundationDB/fdb-record-layer/issues/1676)
* **Performance** Improvement 2 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Performance** Improvement 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Performance** Improvement 4 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
Expand Down
Expand Up @@ -59,6 +59,8 @@ public enum LuceneLogMessageKeys {
INPUT,
LENGTH,
LOCK_NAME,
MERGE_SOURCE,
MERGE_TRIGGER,
OFFSET,
ORIGINAL_DATA_SIZE,
POINTER,
Expand Down
Expand Up @@ -104,4 +104,9 @@ public final class LuceneRecordContextProperties {
* False to use a {@link com.apple.foundationdb.record.lucene.codec.LuceneOptimizedBlendedInfixSuggesterWithoutTermVectors} that does not store term vectors, and sort matches based on positions detection in memory.
*/
public static final RecordLayerPropertyKey<Boolean> LUCENE_AUTO_COMPLETE_WITH_TERM_VECTORS = RecordLayerPropertyKey.booleanPropertyKey("com.apple.foundationdb.record.lucene.autoComplete.withTermVectors", true);

/**
* This controls whether Lucene indexes' directories (and their directories for auto-complete) should be merged based on probability to reduce multiple merges per transaction.
*/
public static final RecordLayerPropertyKey<Boolean> LUCENE_MULTIPLE_MERGE_OPTIMIZATION_ENABLED = RecordLayerPropertyKey.booleanPropertyKey("com.apple.foundationdb.record.lucene.multipleMerge.optimizationEnabled", true);
}
Expand Up @@ -23,6 +23,7 @@
import com.apple.foundationdb.record.RecordCoreArgumentException;
import com.apple.foundationdb.record.lucene.LuceneLoggerInfoStream;
import com.apple.foundationdb.record.lucene.LuceneRecordContextProperties;
import com.apple.foundationdb.record.lucene.directory.FDBDirectoryManager;
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerState;
import com.apple.foundationdb.tuple.Tuple;
import org.apache.lucene.analysis.Analyzer;
Expand All @@ -35,6 +36,7 @@
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeTrigger;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.Term;
Expand Down Expand Up @@ -77,6 +79,7 @@
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;

Expand Down Expand Up @@ -120,18 +123,21 @@ public class LuceneOptimizedBlendedInfixSuggesterWithoutTermVectors extends Anal
*/
private final int minPrefixCharsCopy;

private final int mergeDirectoryCount;

private Double exponent = 2.0;

@SuppressWarnings("squid:S107")
LuceneOptimizedBlendedInfixSuggesterWithoutTermVectors(@Nonnull IndexMaintainerState state, @Nonnull Directory dir, @Nonnull Analyzer indexAnalyzer,
@Nonnull Analyzer queryAnalyzer, int minPrefixChars, BlendedInfixSuggester.BlenderType blenderType, int numFactor,
@Nullable Double exponent, boolean highlight, @Nonnull IndexOptions indexOptions) throws IOException {
@Nullable Double exponent, boolean highlight, @Nonnull IndexOptions indexOptions, int mergeDirectoryCount) throws IOException {
super(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, false, true, highlight);
this.state = state;
this.blenderType = blenderType;
this.indexOptions = indexOptions;
this.numFactor = numFactor;
this.minPrefixCharsCopy = minPrefixChars;
this.mergeDirectoryCount = mergeDirectoryCount;
if (exponent != null) {
this.exponent = exponent;
}
Expand Down Expand Up @@ -260,8 +266,32 @@ protected IndexWriterConfig getIndexWriterConfig(Analyzer indexAnalyzer, IndexWr

@Override
public synchronized void merge(final MergeSource mergeSource, final MergeTrigger trigger) throws IOException {
LOGGER.trace("Auto-complete index mergeSource={}", mergeSource);
super.merge(mergeSource, trigger);
if (state.context.getPropertyStorage().getPropertyValue(LuceneRecordContextProperties.LUCENE_MULTIPLE_MERGE_OPTIMIZATION_ENABLED)
&& trigger == MergeTrigger.FULL_FLUSH) {
if ((new Random()).nextInt(mergeDirectoryCount) == 1) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(FDBDirectoryManager.getMergeLogMessage(mergeSource, trigger, state, "Auto-complete index merge based on probability"));
}
super.merge(mergeSource, trigger);
} else {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(FDBDirectoryManager.getMergeLogMessage(mergeSource, trigger, state, "Auto-complete index merge aborted based on probability"));
}
synchronized (mergeSource) {
MergePolicy.OneMerge nextMerge = mergeSource.getNextMerge();
while (nextMerge != null) {
nextMerge.setAborted();
mergeSource.onMergeFinished(nextMerge);
nextMerge = mergeSource.getNextMerge();
}
}
}
} else {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(FDBDirectoryManager.getMergeLogMessage(mergeSource, trigger, state, "Auto-complete index merge"));
}
super.merge(mergeSource, trigger);
}
}
});
iwc.setCodec(new LuceneOptimizedCodec());
Expand Down
Expand Up @@ -25,11 +25,13 @@
import com.apple.foundationdb.record.lucene.LuceneIndexOptions;
import com.apple.foundationdb.record.lucene.LuceneLoggerInfoStream;
import com.apple.foundationdb.record.lucene.LuceneRecordContextProperties;
import com.apple.foundationdb.record.lucene.directory.FDBDirectoryManager;
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerState;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeTrigger;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
Expand All @@ -41,6 +43,7 @@
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Random;

/**
* Optimized suggester based on {@link BlendedInfixSuggester} to override the {@link IndexWriterConfig} for index writer.
Expand All @@ -53,12 +56,15 @@ public class LuceneOptimizedWrappedBlendedInfixSuggester extends BlendedInfixSug
@Nonnull
private final IndexMaintainerState state;

private final int mergeDirectoryCount;

@SuppressWarnings("squid:S107")
private LuceneOptimizedWrappedBlendedInfixSuggester(@Nonnull IndexMaintainerState state, @Nonnull Directory dir, @Nonnull Analyzer indexAnalyzer,
@Nonnull Analyzer queryAnalyzer, int minPrefixChars, BlenderType blenderType, int numFactor,
@Nullable Double exponent, boolean highlight) throws IOException {
@Nullable Double exponent, boolean highlight, int mergeDirectoryCount) throws IOException {
super(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor, exponent, false, true, highlight);
this.state = state;
this.mergeDirectoryCount = mergeDirectoryCount;
}

@Override
Expand All @@ -73,8 +79,32 @@ protected IndexWriterConfig getIndexWriterConfig(Analyzer indexAnalyzer, IndexWr

@Override
public synchronized void merge(final MergeSource mergeSource, final MergeTrigger trigger) throws IOException {
LOGGER.trace("Auto-complete index mergeSource={}", mergeSource);
super.merge(mergeSource, trigger);
if (state.context.getPropertyStorage().getPropertyValue(LuceneRecordContextProperties.LUCENE_MULTIPLE_MERGE_OPTIMIZATION_ENABLED)
&& trigger == MergeTrigger.FULL_FLUSH) {
if ((new Random()).nextInt(mergeDirectoryCount) == 1) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(FDBDirectoryManager.getMergeLogMessage(mergeSource, trigger, state, "Auto-complete index merge based on probability"));
}
super.merge(mergeSource, trigger);
} else {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(FDBDirectoryManager.getMergeLogMessage(mergeSource, trigger, state, "Auto-complete index merge aborted based on probability"));
}
synchronized (mergeSource) {
MergePolicy.OneMerge nextMerge = mergeSource.getNextMerge();
while (nextMerge != null) {
nextMerge.setAborted();
mergeSource.onMergeFinished(nextMerge);
nextMerge = mergeSource.getNextMerge();
}
}
}
} else {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(FDBDirectoryManager.getMergeLogMessage(mergeSource, trigger, state, "Auto-complete index merge"));
}
super.merge(mergeSource, trigger);
}
}
});
iwc.setCodec(new LuceneOptimizedCodec());
Expand All @@ -85,7 +115,7 @@ public synchronized void merge(final MergeSource mergeSource, final MergeTrigger
@Nonnull
public static AnalyzingInfixSuggester getSuggester(@Nonnull IndexMaintainerState state, @Nonnull Directory dir,
@Nonnull Analyzer indexAnalyzer, @Nonnull Analyzer queryAnalyzer,
boolean highlight, @Nonnull IndexOptions indexOptions) {
boolean highlight, @Nonnull IndexOptions indexOptions, int mergeDirectoryCount) {
final String autoCompleteBlenderType = state.index.getOption(LuceneIndexOptions.AUTO_COMPLETE_BLENDER_TYPE);
final String autoCompleteBlenderNumFactor = state.index.getOption(LuceneIndexOptions.AUTO_COMPLETE_BLENDER_NUM_FACTOR);
final String autoCompleteMinPrefixSize = state.index.getOption(LuceneIndexOptions.AUTO_COMPLETE_MIN_PREFIX_SIZE);
Expand All @@ -99,9 +129,9 @@ public static AnalyzingInfixSuggester getSuggester(@Nonnull IndexMaintainerState

try {
return useTermVectors
? new LuceneOptimizedWrappedBlendedInfixSuggester(state, dir, indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor, exponent, highlight)
? new LuceneOptimizedWrappedBlendedInfixSuggester(state, dir, indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor, exponent, highlight, mergeDirectoryCount)
: new LuceneOptimizedBlendedInfixSuggesterWithoutTermVectors(state, dir, indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType,
numFactor, exponent, highlight, indexOptions);
numFactor, exponent, highlight, indexOptions, mergeDirectoryCount);
} catch (IllegalArgumentException iae) {
throw new RecordCoreArgumentException("Invalid parameter for auto complete suggester", iae)
.addLogInfo(LogMessageKeys.INDEX_NAME, state.index.getName());
Expand Down
Expand Up @@ -23,7 +23,12 @@
import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.async.AsyncUtil;
import com.apple.foundationdb.record.RecordCoreStorageException;
import com.apple.foundationdb.record.logging.KeyValueLogMessage;
import com.apple.foundationdb.record.logging.LogMessageKeys;
import com.apple.foundationdb.record.lucene.LuceneAnalyzerWrapper;
import com.apple.foundationdb.record.lucene.LuceneIndexOptions;
import com.apple.foundationdb.record.lucene.LuceneIndexTypes;
import com.apple.foundationdb.record.lucene.LuceneLogMessageKeys;
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext;
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerState;
import com.apple.foundationdb.subspace.Subspace;
Expand All @@ -33,6 +38,8 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.MergeTrigger;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;

import javax.annotation.Nonnull;
Expand All @@ -41,6 +48,7 @@
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;

/**
* A transaction-scoped manager of {@link FDBDirectory} objects. For a single transaction, all {@link FDBDirectory}
Expand All @@ -58,10 +66,12 @@ public class FDBDirectoryManager implements AutoCloseable {
private final IndexMaintainerState state;
@Nonnull
private final Map<Tuple, FDBDirectoryWrapper> createdDirectories;
private final int mergeDirectoryCount;

private FDBDirectoryManager(@Nonnull IndexMaintainerState state) {
this.state = state;
this.createdDirectories = new ConcurrentHashMap<>();
this.mergeDirectoryCount = getMergeDirectoryCount(state);
}

@Override
Expand Down Expand Up @@ -96,7 +106,7 @@ private FDBDirectoryWrapper getDirectoryWrapper(@Nullable Tuple groupingKey) {
final Tuple mapKey = groupingKey == null ? TupleHelpers.EMPTY : groupingKey;
return createdDirectories.computeIfAbsent(mapKey, key -> {
final Subspace directorySubspace = state.indexSubspace.subspace(key);
return new FDBDirectoryWrapper(state, new FDBDirectory(directorySubspace, state.context));
return new FDBDirectoryWrapper(state, new FDBDirectory(directorySubspace, state.context), mergeDirectoryCount);
});
}

Expand Down Expand Up @@ -149,4 +159,26 @@ public static FDBDirectoryManager getManager(@Nonnull IndexMaintainerState state
return newManager;
}
}

private int getMergeDirectoryCount(@Nonnull IndexMaintainerState state) {
final AtomicInteger luceneMergeCount = new AtomicInteger();
state.store.getRecordMetaData().getAllIndexes().stream().filter(i -> i.getType().equals(LuceneIndexTypes.LUCENE)).forEach(i -> {
if (i.getBooleanOption(LuceneIndexOptions.AUTO_COMPLETE_ENABLED, false)) {
// Auto-complete has its separate directory to merge
luceneMergeCount.getAndAdd(2);
} else {
luceneMergeCount.incrementAndGet();
}
});
return luceneMergeCount.get();
}

public static String getMergeLogMessage(@Nonnull MergeScheduler.MergeSource mergeSource, @Nonnull MergeTrigger trigger,
@Nonnull IndexMaintainerState state, @Nonnull String logMessage) {
return KeyValueLogMessage.of(logMessage,
LuceneLogMessageKeys.MERGE_SOURCE, mergeSource,
LuceneLogMessageKeys.MERGE_TRIGGER, trigger,
LogMessageKeys.INDEX_NAME, state.index.getName(),
LogMessageKeys.INDEX_SUBSPACE, state.indexSubspace);
}
}

0 comments on commit a94fab8

Please sign in to comment.