Search in sources :

Example 1 with ShuffleForcedMergePolicy

use of org.apache.lucene.index.ShuffleForcedMergePolicy in project crate by crate.

the class InternalEngine method getIndexWriterConfig.

private IndexWriterConfig getIndexWriterConfig() {
    final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
    // we by default don't commit on close
    iwc.setCommitOnClose(false);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
    // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
    boolean verbose = false;
    try {
        verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
    } catch (Exception ignore) {
    // ignored
    }
    iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
    iwc.setMergeScheduler(mergeScheduler);
    // Give us the opportunity to upgrade old segments while performing
    // background merges
    MergePolicy mergePolicy = config().getMergePolicy();
    // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
    iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
    if (softDeleteEnabled) {
        mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery, new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, new PrunePostingsMergePolicy(mergePolicy, IdFieldMapper.NAME)));
    }
    boolean shuffleForcedMerge = Booleans.parseBoolean(System.getProperty("es.shuffle_forced_merge", Boolean.TRUE.toString()));
    if (shuffleForcedMerge) {
        // We wrap the merge policy for all indices even though it is mostly useful for time-based indices
        // but there should be no overhead for other type of indices so it's simpler than adding a setting
        // to enable it.
        mergePolicy = new ShuffleForcedMergePolicy(mergePolicy);
    }
    iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
    iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
    iwc.setCodec(engineConfig.getCodec());
    // always use compound on flush - reduces # of file-handles on refresh
    iwc.setUseCompoundFile(true);
    return iwc;
}
Also used : SoftDeletesRetentionMergePolicy(org.apache.lucene.index.SoftDeletesRetentionMergePolicy) MergePolicy(org.apache.lucene.index.MergePolicy) ElasticsearchMergePolicy(org.elasticsearch.index.shard.ElasticsearchMergePolicy) SoftDeletesRetentionMergePolicy(org.apache.lucene.index.SoftDeletesRetentionMergePolicy) ShuffleForcedMergePolicy(org.apache.lucene.index.ShuffleForcedMergePolicy) ShuffleForcedMergePolicy(org.apache.lucene.index.ShuffleForcedMergePolicy) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) TranslogCorruptedException(org.elasticsearch.index.translog.TranslogCorruptedException) IOException(java.io.IOException) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) LiveIndexWriterConfig(org.apache.lucene.index.LiveIndexWriterConfig) LoggerInfoStream(org.elasticsearch.common.lucene.LoggerInfoStream) ElasticsearchMergePolicy(org.elasticsearch.index.shard.ElasticsearchMergePolicy)

Example 2 with ShuffleForcedMergePolicy

use of org.apache.lucene.index.ShuffleForcedMergePolicy in project crate by crate.

the class RecoverySourcePruneMergePolicyTests method testPruneAll.

public void testPruneAll() throws IOException {
    try (Directory dir = newDirectory()) {
        IndexWriterConfig iwc = newIndexWriterConfig();
        RecoverySourcePruneMergePolicy mp = new RecoverySourcePruneMergePolicy("extra_source", MatchNoDocsQuery::new, newLogMergePolicy());
        iwc.setMergePolicy(new ShuffleForcedMergePolicy(mp));
        try (IndexWriter writer = new IndexWriter(dir, iwc)) {
            for (int i = 0; i < 20; i++) {
                if (i > 0 && randomBoolean()) {
                    writer.flush();
                }
                Document doc = new Document();
                doc.add(new StoredField("source", "hello world"));
                doc.add(new StoredField("extra_source", "hello world"));
                doc.add(new NumericDocValuesField("extra_source", 1));
                writer.addDocument(doc);
            }
            writer.forceMerge(1);
            writer.commit();
            try (DirectoryReader reader = DirectoryReader.open(writer)) {
                for (int i = 0; i < reader.maxDoc(); i++) {
                    Document document = reader.document(i);
                    assertEquals(1, document.getFields().size());
                    assertEquals("source", document.getFields().get(0).name());
                }
                assertEquals(1, reader.leaves().size());
                LeafReader leafReader = reader.leaves().get(0).reader();
                NumericDocValues extra_source = leafReader.getNumericDocValues("extra_source");
                if (extra_source != null) {
                    assertEquals(DocIdSetIterator.NO_MORE_DOCS, extra_source.nextDoc());
                }
                if (leafReader instanceof CodecReader && reader instanceof StandardDirectoryReader) {
                    CodecReader codecReader = (CodecReader) leafReader;
                    StandardDirectoryReader sdr = (StandardDirectoryReader) reader;
                    SegmentInfos segmentInfos = sdr.getSegmentInfos();
                    MergePolicy.MergeSpecification forcedMerges = mp.findForcedDeletesMerges(segmentInfos, new MergePolicy.MergeContext() {

                        @Override
                        public int numDeletesToMerge(SegmentCommitInfo info) {
                            return info.info.maxDoc() - 1;
                        }

                        @Override
                        public int numDeletedDocs(SegmentCommitInfo info) {
                            return info.info.maxDoc() - 1;
                        }

                        @Override
                        public InfoStream getInfoStream() {
                            return new NullInfoStream();
                        }

                        @Override
                        public Set<SegmentCommitInfo> getMergingSegments() {
                            return Collections.emptySet();
                        }
                    });
                    // don't wrap if there is nothing to do
                    assertSame(codecReader, forcedMerges.merges.get(0).wrapForMerge(codecReader));
                }
            }
        }
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) CodecReader(org.apache.lucene.index.CodecReader) SegmentInfos(org.apache.lucene.index.SegmentInfos) Set(java.util.Set) LeafReader(org.apache.lucene.index.LeafReader) SegmentCommitInfo(org.apache.lucene.index.SegmentCommitInfo) StandardDirectoryReader(org.apache.lucene.index.StandardDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) StandardDirectoryReader(org.apache.lucene.index.StandardDirectoryReader) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) Document(org.apache.lucene.document.Document) NullInfoStream(org.apache.lucene.util.NullInfoStream) InfoStream(org.apache.lucene.util.InfoStream) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) NullInfoStream(org.apache.lucene.util.NullInfoStream) IndexWriter(org.apache.lucene.index.IndexWriter) MergePolicy(org.apache.lucene.index.MergePolicy) ShuffleForcedMergePolicy(org.apache.lucene.index.ShuffleForcedMergePolicy) ShuffleForcedMergePolicy(org.apache.lucene.index.ShuffleForcedMergePolicy) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 3 with ShuffleForcedMergePolicy

use of org.apache.lucene.index.ShuffleForcedMergePolicy in project crate by crate.

the class PrunePostingsMergePolicyTests method testPrune.

@Test
public void testPrune() throws IOException {
    try (Directory dir = newDirectory()) {
        IndexWriterConfig iwc = newIndexWriterConfig();
        iwc.setSoftDeletesField("_soft_deletes");
        MergePolicy mp = new SoftDeletesRetentionMergePolicy("_soft_deletes", MatchAllDocsQuery::new, new PrunePostingsMergePolicy(newLogMergePolicy(), "id"));
        iwc.setMergePolicy(new ShuffleForcedMergePolicy(mp));
        boolean sorted = randomBoolean();
        if (sorted) {
            iwc.setIndexSort(new Sort(new SortField("sort", SortField.Type.INT)));
        }
        int numUniqueDocs = randomIntBetween(1, 100);
        int numDocs = randomIntBetween(numUniqueDocs, numUniqueDocs * 5);
        try (IndexWriter writer = new IndexWriter(dir, iwc)) {
            for (int i = 0; i < numDocs; i++) {
                if (rarely()) {
                    writer.flush();
                }
                if (rarely()) {
                    writer.forceMerge(1, false);
                }
                int id = i % numUniqueDocs;
                Document doc = new Document();
                doc.add(new StringField("id", "" + id, Field.Store.NO));
                doc.add(newTextField("text", "the quick brown fox", Field.Store.YES));
                doc.add(new NumericDocValuesField("sort", i));
                writer.softUpdateDocument(new Term("id", "" + id), doc, new NumericDocValuesField("_soft_deletes", 1));
                if (i == 0) {
                    // make sure we have at least 2 segments to ensure we do an actual merge to kick out all postings for
                    // soft deletes
                    writer.flush();
                }
            }
            writer.forceMerge(1);
            try (DirectoryReader reader = DirectoryReader.open(writer)) {
                LeafReader leafReader = reader.leaves().get(0).reader();
                assertEquals(numDocs, leafReader.maxDoc());
                Terms id = leafReader.terms("id");
                TermsEnum iterator = id.iterator();
                for (int i = 0; i < numUniqueDocs; i++) {
                    assertTrue(iterator.seekExact(new BytesRef("" + i)));
                    assertEquals(1, iterator.docFreq());
                }
                iterator = leafReader.terms("text").iterator();
                assertTrue(iterator.seekExact(new BytesRef("quick")));
                assertEquals(leafReader.maxDoc(), iterator.docFreq());
                int numValues = 0;
                NumericDocValues sort = leafReader.getNumericDocValues("sort");
                while (sort.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                    if (sorted) {
                        assertEquals(sort.docID(), sort.longValue());
                    } else {
                        assertTrue(sort.longValue() >= 0);
                        assertTrue(sort.longValue() < numDocs);
                    }
                    numValues++;
                }
                assertEquals(numValues, numDocs);
            }
            {
                // prune away a single ID
                Document doc = new Document();
                doc.add(new StringField("id", "test", Field.Store.NO));
                writer.deleteDocuments(new Term("id", "test"));
                writer.flush();
                writer.forceMerge(1);
                // delete it
                writer.updateNumericDocValue(new Term("id", "test"), "_soft_deletes", 1);
                writer.flush();
                writer.forceMerge(1);
                try (DirectoryReader reader = DirectoryReader.open(writer)) {
                    LeafReader leafReader = reader.leaves().get(0).reader();
                    assertEquals(numDocs, leafReader.maxDoc());
                    Terms id = leafReader.terms("id");
                    TermsEnum iterator = id.iterator();
                    assertEquals(numUniqueDocs, id.size());
                    for (int i = 0; i < numUniqueDocs; i++) {
                        assertTrue(iterator.seekExact(new BytesRef("" + i)));
                        assertEquals(1, iterator.docFreq());
                    }
                    assertFalse(iterator.seekExact(new BytesRef("test")));
                    iterator = leafReader.terms("text").iterator();
                    assertTrue(iterator.seekExact(new BytesRef("quick")));
                    assertEquals(leafReader.maxDoc(), iterator.docFreq());
                }
            }
            {
                // drop all ids
                // first add a doc such that we can force merge
                Document doc = new Document();
                doc.add(new StringField("id", "" + 0, Field.Store.NO));
                doc.add(newTextField("text", "the quick brown fox", Field.Store.YES));
                doc.add(new NumericDocValuesField("sort", 0));
                writer.softUpdateDocument(new Term("id", "" + 0), doc, new NumericDocValuesField("_soft_deletes", 1));
                for (int i = 0; i < numUniqueDocs; i++) {
                    writer.updateNumericDocValue(new Term("id", "" + i), "_soft_deletes", 1);
                }
                writer.flush();
                writer.forceMerge(1);
                try (DirectoryReader reader = DirectoryReader.open(writer)) {
                    LeafReader leafReader = reader.leaves().get(0).reader();
                    assertEquals(numDocs + 1, leafReader.maxDoc());
                    assertEquals(0, leafReader.numDocs());
                    assertNull(leafReader.terms("id"));
                    TermsEnum iterator = leafReader.terms("text").iterator();
                    assertTrue(iterator.seekExact(new BytesRef("quick")));
                    assertEquals(leafReader.maxDoc(), iterator.docFreq());
                }
            }
        }
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Terms(org.apache.lucene.index.Terms) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Document(org.apache.lucene.document.Document) TermsEnum(org.apache.lucene.index.TermsEnum) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) SoftDeletesRetentionMergePolicy(org.apache.lucene.index.SoftDeletesRetentionMergePolicy) SoftDeletesRetentionMergePolicy(org.apache.lucene.index.SoftDeletesRetentionMergePolicy) MergePolicy(org.apache.lucene.index.MergePolicy) ShuffleForcedMergePolicy(org.apache.lucene.index.ShuffleForcedMergePolicy) StringField(org.apache.lucene.document.StringField) Sort(org.apache.lucene.search.Sort) ShuffleForcedMergePolicy(org.apache.lucene.index.ShuffleForcedMergePolicy) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)3 MergePolicy (org.apache.lucene.index.MergePolicy)3 ShuffleForcedMergePolicy (org.apache.lucene.index.ShuffleForcedMergePolicy)3 Document (org.apache.lucene.document.Document)2 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)2 DirectoryReader (org.apache.lucene.index.DirectoryReader)2 IndexWriter (org.apache.lucene.index.IndexWriter)2 LeafReader (org.apache.lucene.index.LeafReader)2 NumericDocValues (org.apache.lucene.index.NumericDocValues)2 SoftDeletesRetentionMergePolicy (org.apache.lucene.index.SoftDeletesRetentionMergePolicy)2 Directory (org.apache.lucene.store.Directory)2 IOException (java.io.IOException)1 Set (java.util.Set)1 StoredField (org.apache.lucene.document.StoredField)1 StringField (org.apache.lucene.document.StringField)1 CodecReader (org.apache.lucene.index.CodecReader)1 LiveIndexWriterConfig (org.apache.lucene.index.LiveIndexWriterConfig)1 SegmentCommitInfo (org.apache.lucene.index.SegmentCommitInfo)1 SegmentInfos (org.apache.lucene.index.SegmentInfos)1 StandardDirectoryReader (org.apache.lucene.index.StandardDirectoryReader)1