Search in sources :

Example 16 with SerialMergeScheduler

use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.

the class TestEarlyTerminatingSortingCollector method createRandomIndex.

private void createRandomIndex(boolean singleSortedSegment) throws IOException {
    dir = newDirectory();
    numDocs = atLeast(150);
    final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
    Set<String> randomTerms = new HashSet<>();
    while (randomTerms.size() < numTerms) {
        randomTerms.add(TestUtil.randomSimpleString(random()));
    }
    terms = new ArrayList<>(randomTerms);
    final long seed = random().nextLong();
    final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
    if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
        // MockRandomMP randomly wraps the leaf readers which makes merging angry
        iwc.setMergePolicy(newTieredMergePolicy());
    }
    // for reproducible tests
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setIndexSort(sort);
    iw = new RandomIndexWriter(new Random(seed), dir, iwc);
    // don't do this, it may happen anyway with MockRandomMP
    iw.setDoRandomForceMerge(false);
    for (int i = 0; i < numDocs; ++i) {
        final Document doc = randomDocument();
        iw.addDocument(doc);
        if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
            iw.commit();
        }
        if (random().nextInt(15) == 0) {
            final String term = RandomPicks.randomFrom(random(), terms);
            iw.deleteDocuments(new Term("s", term));
        }
    }
    if (singleSortedSegment) {
        // because of deletions, there might still be a single flush segment in
        // the index, although want want a sorted segment so it needs to be merged
        // refresh
        iw.getReader().close();
        iw.addDocument(new Document());
        iw.commit();
        iw.addDocument(new Document());
        iw.forceMerge(1);
    } else if (random().nextBoolean()) {
        iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
    }
    reader = iw.getReader();
}
Also used : MockRandomMergePolicy(org.apache.lucene.index.MockRandomMergePolicy) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Random(java.util.Random) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) HashSet(java.util.HashSet) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 17 with SerialMergeScheduler

use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.

the class BaseRangeFieldQueryTestCase method verify.

private void verify(Range[][] ranges) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    // Else we can get O(N^2) merging
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < ranges.length / 100) {
        iwc.setMaxBufferedDocs(ranges.length / 100);
    }
    Directory dir;
    if (ranges.length > 50000) {
        dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
    } else {
        dir = newDirectory();
    }
    Set<Integer> deleted = new HashSet<>();
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int id = 0; id < ranges.length; ++id) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + id, Field.Store.NO));
        doc.add(new NumericDocValuesField("id", id));
        if (ranges[id][0].isMissing == false) {
            for (int n = 0; n < ranges[id].length; ++n) {
                doc.add(newRangeField(ranges[id][n]));
            }
        }
        w.addDocument(doc);
        if (id > 0 && random().nextInt(100) == 1) {
            int idToDelete = random().nextInt(id);
            w.deleteDocuments(new Term("id", "" + idToDelete));
            deleted.add(idToDelete);
            if (VERBOSE) {
                System.out.println("  delete id=" + idToDelete);
            }
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    IndexSearcher s = newSearcher(r);
    int dimensions = ranges[0][0].numDimensions();
    int iters = atLeast(25);
    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
    int maxDoc = s.getIndexReader().maxDoc();
    for (int iter = 0; iter < iters; ++iter) {
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " s=" + s);
        }
        // occasionally test open ended bounding ranges
        Range queryRange = nextRange(dimensions);
        int rv = random().nextInt(4);
        Query query;
        Range.QueryType queryType;
        if (rv == 0) {
            queryType = Range.QueryType.INTERSECTS;
            query = newIntersectsQuery(queryRange);
        } else if (rv == 1) {
            queryType = Range.QueryType.CONTAINS;
            query = newContainsQuery(queryRange);
        } else if (rv == 2) {
            queryType = Range.QueryType.WITHIN;
            query = newWithinQuery(queryRange);
        } else {
            queryType = Range.QueryType.CROSSES;
            query = newCrossesQuery(queryRange);
        }
        if (VERBOSE) {
            System.out.println("  query=" + query);
        }
        final FixedBitSet hits = new FixedBitSet(maxDoc);
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public boolean needsScores() {
                return false;
            }
        });
        NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
        for (int docID = 0; docID < maxDoc; ++docID) {
            assertEquals(docID, docIDToID.nextDoc());
            int id = (int) docIDToID.longValue();
            boolean expected;
            if (liveDocs != null && liveDocs.get(docID) == false) {
                // document is deleted
                expected = false;
            } else if (ranges[id][0].isMissing) {
                expected = false;
            } else {
                expected = expectedResult(queryRange, ranges[id], queryType);
            }
            if (hits.get(docID) != expected) {
                StringBuilder b = new StringBuilder();
                b.append("FAIL (iter " + iter + "): ");
                if (expected == true) {
                    b.append("id=" + id + (ranges[id].length > 1 ? " (MultiValue) " : " ") + "should match but did not\n");
                } else {
                    b.append("id=" + id + " should not match but did\n");
                }
                b.append(" queryRange=" + queryRange + "\n");
                b.append(" box" + ((ranges[id].length > 1) ? "es=" : "=") + ranges[id][0]);
                for (int n = 1; n < ranges[id].length; ++n) {
                    b.append(", ");
                    b.append(ranges[id][n]);
                }
                b.append("\n queryType=" + queryType + "\n");
                b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
                fail("wrong hit (first of possibly more):\n\n" + b);
            }
        }
    }
    IOUtils.close(r, dir);
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Bits(org.apache.lucene.util.Bits) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 SerialMergeScheduler (org.apache.lucene.index.SerialMergeScheduler)17 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)15 Directory (org.apache.lucene.store.Directory)15 Document (org.apache.lucene.document.Document)14 IndexReader (org.apache.lucene.index.IndexReader)12 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)9 IndexSearcher (org.apache.lucene.search.IndexSearcher)9 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)8 Term (org.apache.lucene.index.Term)8 HashSet (java.util.HashSet)7 IOException (java.io.IOException)6 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)6 NumericDocValues (org.apache.lucene.index.NumericDocValues)6 FixedBitSet (org.apache.lucene.util.FixedBitSet)6 StoredField (org.apache.lucene.document.StoredField)5 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)4 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)4 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)4 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)4