use of org.apache.lucene.index.ShuffleForcedMergePolicy in project crate by crate.
the class InternalEngine method getIndexWriterConfig.
private IndexWriterConfig getIndexWriterConfig() {
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
// we by default don't commit on close
iwc.setCommitOnClose(false);
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
boolean verbose = false;
try {
verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
} catch (Exception ignore) {
// ignored
}
iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
iwc.setMergeScheduler(mergeScheduler);
// Give us the opportunity to upgrade old segments while performing
// background merges
MergePolicy mergePolicy = config().getMergePolicy();
// always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
if (softDeleteEnabled) {
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery, new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, new PrunePostingsMergePolicy(mergePolicy, IdFieldMapper.NAME)));
}
boolean shuffleForcedMerge = Booleans.parseBoolean(System.getProperty("es.shuffle_forced_merge", Boolean.TRUE.toString()));
if (shuffleForcedMerge) {
// We wrap the merge policy for all indices even though it is mostly useful for time-based indices
// but there should be no overhead for other type of indices so it's simpler than adding a setting
// to enable it.
mergePolicy = new ShuffleForcedMergePolicy(mergePolicy);
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
// always use compound on flush - reduces # of file-handles on refresh
iwc.setUseCompoundFile(true);
return iwc;
}
use of org.apache.lucene.index.ShuffleForcedMergePolicy in project crate by crate.
the class RecoverySourcePruneMergePolicyTests method testPruneAll.
public void testPruneAll() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = newIndexWriterConfig();
RecoverySourcePruneMergePolicy mp = new RecoverySourcePruneMergePolicy("extra_source", MatchNoDocsQuery::new, newLogMergePolicy());
iwc.setMergePolicy(new ShuffleForcedMergePolicy(mp));
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
for (int i = 0; i < 20; i++) {
if (i > 0 && randomBoolean()) {
writer.flush();
}
Document doc = new Document();
doc.add(new StoredField("source", "hello world"));
doc.add(new StoredField("extra_source", "hello world"));
doc.add(new NumericDocValuesField("extra_source", 1));
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.commit();
try (DirectoryReader reader = DirectoryReader.open(writer)) {
for (int i = 0; i < reader.maxDoc(); i++) {
Document document = reader.document(i);
assertEquals(1, document.getFields().size());
assertEquals("source", document.getFields().get(0).name());
}
assertEquals(1, reader.leaves().size());
LeafReader leafReader = reader.leaves().get(0).reader();
NumericDocValues extra_source = leafReader.getNumericDocValues("extra_source");
if (extra_source != null) {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, extra_source.nextDoc());
}
if (leafReader instanceof CodecReader && reader instanceof StandardDirectoryReader) {
CodecReader codecReader = (CodecReader) leafReader;
StandardDirectoryReader sdr = (StandardDirectoryReader) reader;
SegmentInfos segmentInfos = sdr.getSegmentInfos();
MergePolicy.MergeSpecification forcedMerges = mp.findForcedDeletesMerges(segmentInfos, new MergePolicy.MergeContext() {
@Override
public int numDeletesToMerge(SegmentCommitInfo info) {
return info.info.maxDoc() - 1;
}
@Override
public int numDeletedDocs(SegmentCommitInfo info) {
return info.info.maxDoc() - 1;
}
@Override
public InfoStream getInfoStream() {
return new NullInfoStream();
}
@Override
public Set<SegmentCommitInfo> getMergingSegments() {
return Collections.emptySet();
}
});
// don't wrap if there is nothing to do
assertSame(codecReader, forcedMerges.merges.get(0).wrapForMerge(codecReader));
}
}
}
}
}
use of org.apache.lucene.index.ShuffleForcedMergePolicy in project crate by crate.
the class PrunePostingsMergePolicyTests method testPrune.
@Test
public void testPrune() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setSoftDeletesField("_soft_deletes");
MergePolicy mp = new SoftDeletesRetentionMergePolicy("_soft_deletes", MatchAllDocsQuery::new, new PrunePostingsMergePolicy(newLogMergePolicy(), "id"));
iwc.setMergePolicy(new ShuffleForcedMergePolicy(mp));
boolean sorted = randomBoolean();
if (sorted) {
iwc.setIndexSort(new Sort(new SortField("sort", SortField.Type.INT)));
}
int numUniqueDocs = randomIntBetween(1, 100);
int numDocs = randomIntBetween(numUniqueDocs, numUniqueDocs * 5);
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
for (int i = 0; i < numDocs; i++) {
if (rarely()) {
writer.flush();
}
if (rarely()) {
writer.forceMerge(1, false);
}
int id = i % numUniqueDocs;
Document doc = new Document();
doc.add(new StringField("id", "" + id, Field.Store.NO));
doc.add(newTextField("text", "the quick brown fox", Field.Store.YES));
doc.add(new NumericDocValuesField("sort", i));
writer.softUpdateDocument(new Term("id", "" + id), doc, new NumericDocValuesField("_soft_deletes", 1));
if (i == 0) {
// make sure we have at least 2 segments to ensure we do an actual merge to kick out all postings for
// soft deletes
writer.flush();
}
}
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
LeafReader leafReader = reader.leaves().get(0).reader();
assertEquals(numDocs, leafReader.maxDoc());
Terms id = leafReader.terms("id");
TermsEnum iterator = id.iterator();
for (int i = 0; i < numUniqueDocs; i++) {
assertTrue(iterator.seekExact(new BytesRef("" + i)));
assertEquals(1, iterator.docFreq());
}
iterator = leafReader.terms("text").iterator();
assertTrue(iterator.seekExact(new BytesRef("quick")));
assertEquals(leafReader.maxDoc(), iterator.docFreq());
int numValues = 0;
NumericDocValues sort = leafReader.getNumericDocValues("sort");
while (sort.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (sorted) {
assertEquals(sort.docID(), sort.longValue());
} else {
assertTrue(sort.longValue() >= 0);
assertTrue(sort.longValue() < numDocs);
}
numValues++;
}
assertEquals(numValues, numDocs);
}
{
// prune away a single ID
Document doc = new Document();
doc.add(new StringField("id", "test", Field.Store.NO));
writer.deleteDocuments(new Term("id", "test"));
writer.flush();
writer.forceMerge(1);
// delete it
writer.updateNumericDocValue(new Term("id", "test"), "_soft_deletes", 1);
writer.flush();
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
LeafReader leafReader = reader.leaves().get(0).reader();
assertEquals(numDocs, leafReader.maxDoc());
Terms id = leafReader.terms("id");
TermsEnum iterator = id.iterator();
assertEquals(numUniqueDocs, id.size());
for (int i = 0; i < numUniqueDocs; i++) {
assertTrue(iterator.seekExact(new BytesRef("" + i)));
assertEquals(1, iterator.docFreq());
}
assertFalse(iterator.seekExact(new BytesRef("test")));
iterator = leafReader.terms("text").iterator();
assertTrue(iterator.seekExact(new BytesRef("quick")));
assertEquals(leafReader.maxDoc(), iterator.docFreq());
}
}
{
// drop all ids
// first add a doc such that we can force merge
Document doc = new Document();
doc.add(new StringField("id", "" + 0, Field.Store.NO));
doc.add(newTextField("text", "the quick brown fox", Field.Store.YES));
doc.add(new NumericDocValuesField("sort", 0));
writer.softUpdateDocument(new Term("id", "" + 0), doc, new NumericDocValuesField("_soft_deletes", 1));
for (int i = 0; i < numUniqueDocs; i++) {
writer.updateNumericDocValue(new Term("id", "" + i), "_soft_deletes", 1);
}
writer.flush();
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
LeafReader leafReader = reader.leaves().get(0).reader();
assertEquals(numDocs + 1, leafReader.maxDoc());
assertEquals(0, leafReader.numDocs());
assertNull(leafReader.terms("id"));
TermsEnum iterator = leafReader.terms("text").iterator();
assertTrue(iterator.seekExact(new BytesRef("quick")));
assertEquals(leafReader.maxDoc(), iterator.docFreq());
}
}
}
}
}
Aggregations