Search in sources :

Example 1 with PrintStreamInfoStream

use of org.apache.lucene.util.PrintStreamInfoStream in project lucene-solr by apache.

the class IndexUpgrader method parseArgs.

@SuppressForbidden(reason = "System.out required: command line tool")
static IndexUpgrader parseArgs(String[] args) throws IOException {
    String path = null;
    boolean deletePriorCommits = false;
    InfoStream out = null;
    String dirImpl = null;
    int i = 0;
    while (i < args.length) {
        String arg = args[i];
        if ("-delete-prior-commits".equals(arg)) {
            deletePriorCommits = true;
        } else if ("-verbose".equals(arg)) {
            out = new PrintStreamInfoStream(System.out);
        } else if ("-dir-impl".equals(arg)) {
            if (i == args.length - 1) {
                System.out.println("ERROR: missing value for -dir-impl option");
                System.exit(1);
            }
            i++;
            dirImpl = args[i];
        } else if (path == null) {
            path = arg;
        } else {
            printUsage();
        }
        i++;
    }
    if (path == null) {
        printUsage();
    }
    Path p = Paths.get(path);
    Directory dir = null;
    if (dirImpl == null) {
        dir = FSDirectory.open(p);
    } else {
        dir = CommandLineUtil.newFSDirectory(dirImpl, p);
    }
    return new IndexUpgrader(dir, out, deletePriorCommits);
}
Also used : InfoStream(org.apache.lucene.util.InfoStream) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream) Path(java.nio.file.Path) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) SuppressForbidden(org.apache.lucene.util.SuppressForbidden)

Example 2 with PrintStreamInfoStream

use of org.apache.lucene.util.PrintStreamInfoStream in project lucene-solr by apache.

the class ThreadedIndexingAndSearchingTestCase method runTest.

public void runTest(String testName) throws Exception {
    failed.set(false);
    addCount.set(0);
    delCount.set(0);
    packCount.set(0);
    final long t0 = System.currentTimeMillis();
    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random);
    final Path tempDir = createTempDir(testName);
    // some subclasses rely on this being MDW
    dir = getDirectory(newMockFSDirectory(tempDir));
    if (dir instanceof BaseDirectoryWrapper) {
        // don't double-checkIndex, we do it ourselves.
        ((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false);
    }
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
    conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
    if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
        ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
    }
    if (LuceneTestCase.TEST_NIGHTLY) {
        // newIWConfig makes smallish max seg size, which
        // results in tons and tons of segments for this test
        // when run nightly:
        MergePolicy mp = conf.getMergePolicy();
        if (mp instanceof TieredMergePolicy) {
            ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
        } else if (mp instanceof LogByteSizeMergePolicy) {
            ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
        } else if (mp instanceof LogMergePolicy) {
            ((LogMergePolicy) mp).setMaxMergeDocs(100000);
        }
        // when running nightly, merging can still have crazy parameters, 
        // and might use many per-field codecs. turn on CFS for IW flushes
        // and ensure CFS ratio is reasonable to keep it contained.
        conf.setUseCompoundFile(true);
        mp.setNoCFSRatio(Math.max(0.25d, mp.getNoCFSRatio()));
    }
    conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {

        @Override
        public void warm(LeafReader reader) throws IOException {
            if (VERBOSE) {
                System.out.println("TEST: now warm merged reader=" + reader);
            }
            warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
            final int maxDoc = reader.maxDoc();
            final Bits liveDocs = reader.getLiveDocs();
            int sum = 0;
            final int inc = Math.max(1, maxDoc / 50);
            for (int docID = 0; docID < maxDoc; docID += inc) {
                if (liveDocs == null || liveDocs.get(docID)) {
                    final Document doc = reader.document(docID);
                    sum += doc.getFields().size();
                }
            }
            IndexSearcher searcher = newSearcher(reader, false);
            sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
            if (VERBOSE) {
                System.out.println("TEST: warm visited " + sum + " fields");
            }
        }
    });
    if (VERBOSE) {
        conf.setInfoStream(new PrintStreamInfoStream(System.out) {

            @Override
            public void message(String component, String message) {
                if ("TP".equals(component)) {
                    // ignore test points!
                    return;
                }
                super.message(component, message);
            }
        });
    }
    writer = new IndexWriter(dir, conf);
    TestUtil.reduceOpenFiles(writer);
    final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
    doAfterWriter(es);
    final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);
    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;
    final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
    final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
    final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());
    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
    final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);
    if (VERBOSE) {
        System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]");
    }
    // Let index build up a bit
    Thread.sleep(100);
    doSearching(es, stopTime);
    if (VERBOSE) {
        System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
    for (Thread thread : indexThreads) {
        thread.join();
    }
    if (VERBOSE) {
        System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
    }
    final IndexSearcher s = getFinalSearcher();
    if (VERBOSE) {
        System.out.println("TEST: finalSearcher=" + s);
    }
    assertFalse(failed.get());
    boolean doFail = false;
    // Verify: make sure delIDs are in fact deleted:
    for (String id : delIDs) {
        final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
        if (hits.totalHits != 0) {
            System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
            doFail = true;
        }
    }
    // Verify: make sure delPackIDs are in fact deleted:
    for (String id : delPackIDs) {
        final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
        if (hits.totalHits != 0) {
            System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
            doFail = true;
        }
    }
    // Verify: make sure each group of sub-docs are still in docID order:
    for (SubDocs subDocs : allSubDocs) {
        TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
        if (!subDocs.deleted) {
            // We sort by relevance but the scores should be identical so sort falls back to by docID:
            if (hits.totalHits != subDocs.subIDs.size()) {
                System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
                doFail = true;
            } else {
                int lastDocID = -1;
                int startDocID = -1;
                for (ScoreDoc scoreDoc : hits.scoreDocs) {
                    final int docID = scoreDoc.doc;
                    if (lastDocID != -1) {
                        assertEquals(1 + lastDocID, docID);
                    } else {
                        startDocID = docID;
                    }
                    lastDocID = docID;
                    final Document doc = s.doc(docID);
                    assertEquals(subDocs.packID, doc.get("packID"));
                }
                lastDocID = startDocID - 1;
                for (String subID : subDocs.subIDs) {
                    hits = s.search(new TermQuery(new Term("docid", subID)), 1);
                    assertEquals(1, hits.totalHits);
                    final int docID = hits.scoreDocs[0].doc;
                    if (lastDocID != -1) {
                        assertEquals(1 + lastDocID, docID);
                    }
                    lastDocID = docID;
                }
            }
        } else {
            // because we can re-use packID for update:
            for (String subID : subDocs.subIDs) {
                assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
            }
        }
    }
    // Verify: make sure all not-deleted docs are in fact
    // not deleted:
    final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
    docs.close();
    for (int id = 0; id < endID; id++) {
        String stringID = "" + id;
        if (!delIDs.contains(stringID)) {
            final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
            if (hits.totalHits != 1) {
                System.out.println("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs);
                doFail = true;
            }
        }
    }
    assertFalse(doFail);
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs());
    releaseSearcher(s);
    writer.commit();
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
    doClose();
    try {
        writer.commit();
    } finally {
        writer.close();
    }
    // searches, and that IS may be using this es!
    if (es != null) {
        es.shutdown();
        es.awaitTermination(1, TimeUnit.SECONDS);
    }
    TestUtil.checkIndex(dir);
    dir.close();
    if (VERBOSE) {
        System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) LineFileDocs(org.apache.lucene.util.LineFileDocs) Path(java.nio.file.Path) TermQuery(org.apache.lucene.search.TermQuery) NamedThreadFactory(org.apache.lucene.util.NamedThreadFactory) FailOnNonBulkMergesInfoStream(org.apache.lucene.util.FailOnNonBulkMergesInfoStream) IOException(java.io.IOException) ExecutorService(java.util.concurrent.ExecutorService) Bits(org.apache.lucene.util.Bits) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream)

Example 3 with PrintStreamInfoStream

use of org.apache.lucene.util.PrintStreamInfoStream in project lucene-solr by apache.

the class TestDocInverterPerFieldErrorInfo method testNoExtraNoise.

@Test
public void testNoExtraNoise() throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer;
    IndexWriterConfig c = new IndexWriterConfig(new ThrowingAnalyzer());
    final ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
    PrintStream infoPrintStream = new PrintStream(infoBytes, true, IOUtils.UTF_8);
    PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream);
    c.setInfoStream(printStreamInfoStream);
    writer = new IndexWriter(dir, c);
    Document doc = new Document();
    doc.add(newField("boringFieldName", "aaa ", storedTextType));
    try {
        writer.addDocument(doc);
    } catch (BadNews badNews) {
        fail("Unwanted exception");
    }
    infoPrintStream.flush();
    String infoStream = new String(infoBytes.toByteArray(), IOUtils.UTF_8);
    assertFalse(infoStream.contains("boringFieldName"));
    writer.close();
    dir.close();
}
Also used : PrintStream(java.io.PrintStream) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Example 4 with PrintStreamInfoStream

use of org.apache.lucene.util.PrintStreamInfoStream in project lucene-solr by apache.

the class TestDocInverterPerFieldErrorInfo method testInfoStreamGetsFieldName.

@Test
public void testInfoStreamGetsFieldName() throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer;
    IndexWriterConfig c = new IndexWriterConfig(new ThrowingAnalyzer());
    final ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
    PrintStream infoPrintStream = new PrintStream(infoBytes, true, IOUtils.UTF_8);
    PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream);
    c.setInfoStream(printStreamInfoStream);
    writer = new IndexWriter(dir, c);
    Document doc = new Document();
    doc.add(newField("distinctiveFieldName", "aaa ", storedTextType));
    expectThrows(BadNews.class, () -> {
        writer.addDocument(doc);
    });
    infoPrintStream.flush();
    String infoStream = new String(infoBytes.toByteArray(), IOUtils.UTF_8);
    assertTrue(infoStream.contains("distinctiveFieldName"));
    writer.close();
    dir.close();
}
Also used : PrintStream(java.io.PrintStream) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Example 5 with PrintStreamInfoStream

use of org.apache.lucene.util.PrintStreamInfoStream in project lucene-solr by apache.

the class TestMergeSchedulerExternal method testSubclassConcurrentMergeScheduler.

public void testSubclassConcurrentMergeScheduler() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.failOn(new FailOnlyOnMerge());
    Document doc = new Document();
    Field idField = newStringField("id", "", Field.Store.YES);
    doc.add(idField);
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setMergeScheduler(new MyMergeScheduler()).setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergePolicy(newLogMergePolicy());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    infoStream = new PrintStreamInfoStream(new PrintStream(baos, true, IOUtils.UTF_8));
    iwc.setInfoStream(infoStream);
    IndexWriter writer = new IndexWriter(dir, iwc);
    LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy();
    logMP.setMergeFactor(10);
    for (int i = 0; i < 20; i++) {
        writer.addDocument(doc);
    }
    try {
        ((MyMergeScheduler) writer.getConfig().getMergeScheduler()).sync();
    } catch (IllegalStateException ise) {
    // OK
    }
    writer.rollback();
    try {
        assertTrue(mergeThreadCreated);
        assertTrue(mergeCalled);
        assertTrue(excCalled);
    } catch (AssertionError ae) {
        System.out.println("TEST FAILED; IW infoStream output:");
        System.out.println(baos.toString(IOUtils.UTF_8));
        throw ae;
    }
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) PrintStream(java.io.PrintStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.apache.lucene.document.Document) Field(org.apache.lucene.document.Field) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream) LogMergePolicy(org.apache.lucene.index.LogMergePolicy) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

PrintStreamInfoStream (org.apache.lucene.util.PrintStreamInfoStream)6 Document (org.apache.lucene.document.Document)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 PrintStream (java.io.PrintStream)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)3 Directory (org.apache.lucene.store.Directory)3 IOException (java.io.IOException)2 Path (java.nio.file.Path)2 MockDirectoryWrapper (org.apache.lucene.store.MockDirectoryWrapper)2 LineFileDocs (org.apache.lucene.util.LineFileDocs)2 Test (org.junit.Test)2 HashSet (java.util.HashSet)1 ExecutorService (java.util.concurrent.ExecutorService)1 Field (org.apache.lucene.document.Field)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 LogMergePolicy (org.apache.lucene.index.LogMergePolicy)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 ScoreDoc (org.apache.lucene.search.ScoreDoc)1 TermQuery (org.apache.lucene.search.TermQuery)1