Search in sources :

Example 1 with FailOnNonBulkMergesInfoStream

use of org.apache.lucene.util.FailOnNonBulkMergesInfoStream in project lucene-solr by apache.

the class TestConsistentFieldNumbers method testFieldNumberGaps.

public void testFieldNumberGaps() throws IOException {
    int numIters = atLeast(13);
    for (int i = 0; i < numIters; i++) {
        Directory dir = newDirectory();
        {
            IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
            Document d = new Document();
            d.add(new TextField("f1", "d1 first field", Field.Store.YES));
            d.add(new TextField("f2", "d1 second field", Field.Store.YES));
            writer.addDocument(d);
            writer.close();
            SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
            assertEquals(1, sis.size());
            FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
            assertEquals("f1", fis1.fieldInfo(0).name);
            assertEquals("f2", fis1.fieldInfo(1).name);
        }
        {
            IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
            Document d = new Document();
            d.add(new TextField("f1", "d2 first field", Field.Store.YES));
            d.add(new StoredField("f3", new byte[] { 1, 2, 3 }));
            writer.addDocument(d);
            writer.close();
            SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
            assertEquals(2, sis.size());
            FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
            FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
            assertEquals("f1", fis1.fieldInfo(0).name);
            assertEquals("f2", fis1.fieldInfo(1).name);
            assertEquals("f1", fis2.fieldInfo(0).name);
            assertNull(fis2.fieldInfo(1));
            assertEquals("f3", fis2.fieldInfo(2).name);
        }
        {
            IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
            Document d = new Document();
            d.add(new TextField("f1", "d3 first field", Field.Store.YES));
            d.add(new TextField("f2", "d3 second field", Field.Store.YES));
            d.add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 }));
            writer.addDocument(d);
            writer.close();
            SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
            assertEquals(3, sis.size());
            FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
            FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
            FieldInfos fis3 = IndexWriter.readFieldInfos(sis.info(2));
            assertEquals("f1", fis1.fieldInfo(0).name);
            assertEquals("f2", fis1.fieldInfo(1).name);
            assertEquals("f1", fis2.fieldInfo(0).name);
            assertNull(fis2.fieldInfo(1));
            assertEquals("f3", fis2.fieldInfo(2).name);
            assertEquals("f1", fis3.fieldInfo(0).name);
            assertEquals("f2", fis3.fieldInfo(1).name);
            assertEquals("f3", fis3.fieldInfo(2).name);
        }
        {
            IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
            writer.deleteDocuments(new Term("f1", "d1"));
            // nuke the first segment entirely so that the segment with gaps is
            // loaded first!
            writer.forceMergeDeletes();
            writer.close();
        }
        IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogByteSizeMergePolicy()).setInfoStream(new FailOnNonBulkMergesInfoStream()));
        writer.forceMerge(1);
        writer.close();
        SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
        assertEquals(1, sis.size());
        FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
        assertEquals("f1", fis1.fieldInfo(0).name);
        assertEquals("f2", fis1.fieldInfo(1).name);
        assertEquals("f3", fis1.fieldInfo(2).name);
        dir.close();
    }
}
Also used : StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TextField(org.apache.lucene.document.TextField) FailOnNonBulkMergesInfoStream(org.apache.lucene.util.FailOnNonBulkMergesInfoStream) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 2 with FailOnNonBulkMergesInfoStream

use of org.apache.lucene.util.FailOnNonBulkMergesInfoStream in project lucene-solr by apache.

the class ThreadedIndexingAndSearchingTestCase method runTest.

public void runTest(String testName) throws Exception {
    failed.set(false);
    addCount.set(0);
    delCount.set(0);
    packCount.set(0);
    final long t0 = System.currentTimeMillis();
    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random);
    final Path tempDir = createTempDir(testName);
    // some subclasses rely on this being MDW
    dir = getDirectory(newMockFSDirectory(tempDir));
    if (dir instanceof BaseDirectoryWrapper) {
        // don't double-checkIndex, we do it ourselves.
        ((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false);
    }
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
    conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
    if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
        ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
    }
    if (LuceneTestCase.TEST_NIGHTLY) {
        // newIWConfig makes smallish max seg size, which
        // results in tons and tons of segments for this test
        // when run nightly:
        MergePolicy mp = conf.getMergePolicy();
        if (mp instanceof TieredMergePolicy) {
            ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
        } else if (mp instanceof LogByteSizeMergePolicy) {
            ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
        } else if (mp instanceof LogMergePolicy) {
            ((LogMergePolicy) mp).setMaxMergeDocs(100000);
        }
        // when running nightly, merging can still have crazy parameters, 
        // and might use many per-field codecs. turn on CFS for IW flushes
        // and ensure CFS ratio is reasonable to keep it contained.
        conf.setUseCompoundFile(true);
        mp.setNoCFSRatio(Math.max(0.25d, mp.getNoCFSRatio()));
    }
    conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {

        @Override
        public void warm(LeafReader reader) throws IOException {
            if (VERBOSE) {
                System.out.println("TEST: now warm merged reader=" + reader);
            }
            warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
            final int maxDoc = reader.maxDoc();
            final Bits liveDocs = reader.getLiveDocs();
            int sum = 0;
            final int inc = Math.max(1, maxDoc / 50);
            for (int docID = 0; docID < maxDoc; docID += inc) {
                if (liveDocs == null || liveDocs.get(docID)) {
                    final Document doc = reader.document(docID);
                    sum += doc.getFields().size();
                }
            }
            IndexSearcher searcher = newSearcher(reader, false);
            sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
            if (VERBOSE) {
                System.out.println("TEST: warm visited " + sum + " fields");
            }
        }
    });
    if (VERBOSE) {
        conf.setInfoStream(new PrintStreamInfoStream(System.out) {

            @Override
            public void message(String component, String message) {
                if ("TP".equals(component)) {
                    // ignore test points!
                    return;
                }
                super.message(component, message);
            }
        });
    }
    writer = new IndexWriter(dir, conf);
    TestUtil.reduceOpenFiles(writer);
    final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
    doAfterWriter(es);
    final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);
    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;
    final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
    final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
    final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());
    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
    final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);
    if (VERBOSE) {
        System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]");
    }
    // Let index build up a bit
    Thread.sleep(100);
    doSearching(es, stopTime);
    if (VERBOSE) {
        System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
    for (Thread thread : indexThreads) {
        thread.join();
    }
    if (VERBOSE) {
        System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
    }
    final IndexSearcher s = getFinalSearcher();
    if (VERBOSE) {
        System.out.println("TEST: finalSearcher=" + s);
    }
    assertFalse(failed.get());
    boolean doFail = false;
    // Verify: make sure delIDs are in fact deleted:
    for (String id : delIDs) {
        final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
        if (hits.totalHits != 0) {
            System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
            doFail = true;
        }
    }
    // Verify: make sure delPackIDs are in fact deleted:
    for (String id : delPackIDs) {
        final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
        if (hits.totalHits != 0) {
            System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
            doFail = true;
        }
    }
    // Verify: make sure each group of sub-docs are still in docID order:
    for (SubDocs subDocs : allSubDocs) {
        TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
        if (!subDocs.deleted) {
            // We sort by relevance but the scores should be identical so sort falls back to by docID:
            if (hits.totalHits != subDocs.subIDs.size()) {
                System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
                doFail = true;
            } else {
                int lastDocID = -1;
                int startDocID = -1;
                for (ScoreDoc scoreDoc : hits.scoreDocs) {
                    final int docID = scoreDoc.doc;
                    if (lastDocID != -1) {
                        assertEquals(1 + lastDocID, docID);
                    } else {
                        startDocID = docID;
                    }
                    lastDocID = docID;
                    final Document doc = s.doc(docID);
                    assertEquals(subDocs.packID, doc.get("packID"));
                }
                lastDocID = startDocID - 1;
                for (String subID : subDocs.subIDs) {
                    hits = s.search(new TermQuery(new Term("docid", subID)), 1);
                    assertEquals(1, hits.totalHits);
                    final int docID = hits.scoreDocs[0].doc;
                    if (lastDocID != -1) {
                        assertEquals(1 + lastDocID, docID);
                    }
                    lastDocID = docID;
                }
            }
        } else {
            // because we can re-use packID for update:
            for (String subID : subDocs.subIDs) {
                assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
            }
        }
    }
    // Verify: make sure all not-deleted docs are in fact
    // not deleted:
    final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
    docs.close();
    for (int id = 0; id < endID; id++) {
        String stringID = "" + id;
        if (!delIDs.contains(stringID)) {
            final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
            if (hits.totalHits != 1) {
                System.out.println("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs);
                doFail = true;
            }
        }
    }
    assertFalse(doFail);
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs());
    releaseSearcher(s);
    writer.commit();
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
    doClose();
    try {
        writer.commit();
    } finally {
        writer.close();
    }
    // searches, and that IS may be using this es!
    if (es != null) {
        es.shutdown();
        es.awaitTermination(1, TimeUnit.SECONDS);
    }
    TestUtil.checkIndex(dir);
    dir.close();
    if (VERBOSE) {
        System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) LineFileDocs(org.apache.lucene.util.LineFileDocs) Path(java.nio.file.Path) TermQuery(org.apache.lucene.search.TermQuery) NamedThreadFactory(org.apache.lucene.util.NamedThreadFactory) FailOnNonBulkMergesInfoStream(org.apache.lucene.util.FailOnNonBulkMergesInfoStream) IOException(java.io.IOException) ExecutorService(java.util.concurrent.ExecutorService) Bits(org.apache.lucene.util.Bits) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream)

Aggregations

MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 Document (org.apache.lucene.document.Document)2 FailOnNonBulkMergesInfoStream (org.apache.lucene.util.FailOnNonBulkMergesInfoStream)2 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 ExecutorService (java.util.concurrent.ExecutorService)1 StoredField (org.apache.lucene.document.StoredField)1 TextField (org.apache.lucene.document.TextField)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 ScoreDoc (org.apache.lucene.search.ScoreDoc)1 TermQuery (org.apache.lucene.search.TermQuery)1 TopDocs (org.apache.lucene.search.TopDocs)1 BaseDirectoryWrapper (org.apache.lucene.store.BaseDirectoryWrapper)1 Directory (org.apache.lucene.store.Directory)1 Bits (org.apache.lucene.util.Bits)1 LineFileDocs (org.apache.lucene.util.LineFileDocs)1 NamedThreadFactory (org.apache.lucene.util.NamedThreadFactory)1 PrintStreamInfoStream (org.apache.lucene.util.PrintStreamInfoStream)1