Search in sources :

Example 11 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestNRTReplication method testFullClusterCrash.

@Nightly
public void testFullClusterCrash() throws Exception {
    Path path1 = createTempDir("1");
    NodeProcess primary = startNode(-1, 0, path1, -1, true);
    Path path2 = createTempDir("2");
    NodeProcess replica1 = startNode(primary.tcpPort, 1, path2, -1, true);
    Path path3 = createTempDir("3");
    NodeProcess replica2 = startNode(primary.tcpPort, 2, path3, -1, true);
    sendReplicasToPrimary(primary, replica1, replica2);
    // Index 50 docs into primary:
    LineFileDocs docs = new LineFileDocs(random());
    long primaryVersion1 = 0;
    for (int iter = 0; iter < 5; iter++) {
        try (Connection c = new Connection(primary.tcpPort)) {
            c.out.writeByte(SimplePrimaryNode.CMD_INDEXING);
            for (int i = 0; i < 10; i++) {
                Document doc = docs.nextDoc();
                primary.addOrUpdateDocument(c, doc, false);
            }
        }
        // Refresh primary, which also pushes to replicas:
        primaryVersion1 = primary.flush(0);
        assertTrue(primaryVersion1 > 0);
    }
    // Wait for replicas to sync up:
    waitForVersionAndHits(replica1, primaryVersion1, 50);
    waitForVersionAndHits(replica2, primaryVersion1, 50);
    primary.commit();
    replica1.commit();
    replica2.commit();
    // Index 10 more docs, but don't sync to replicas:
    try (Connection c = new Connection(primary.tcpPort)) {
        c.out.writeByte(SimplePrimaryNode.CMD_INDEXING);
        for (int i = 0; i < 10; i++) {
            Document doc = docs.nextDoc();
            primary.addOrUpdateDocument(c, doc, false);
        }
    }
    // Full cluster crash
    primary.crash();
    replica1.crash();
    replica2.crash();
    // Full cluster restart
    primary = startNode(-1, 0, path1, -1, true);
    replica1 = startNode(primary.tcpPort, 1, path2, -1, true);
    replica2 = startNode(primary.tcpPort, 2, path3, -1, true);
    // Only 50 because we didn't commit primary before the crash:
    // It's -1 because it's unpredictable how IW changes segments version on init:
    assertVersionAndHits(primary, -1, 50);
    assertVersionAndHits(replica1, primary.initInfosVersion, 50);
    assertVersionAndHits(replica2, primary.initInfosVersion, 50);
    primary.close();
    replica1.close();
    replica2.close();
}
Also used : Path(java.nio.file.Path) Document(org.apache.lucene.document.Document) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 12 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestNRTReplication method testIndexingWhileReplicaIsDown.

// Start up, index 10 docs, replicate, commit, crash, index more docs, replicate, then restart the replica
@Nightly
public void testIndexingWhileReplicaIsDown() throws Exception {
    Path primaryPath = createTempDir("primary");
    NodeProcess primary = startNode(-1, 0, primaryPath, -1, false);
    Path replicaPath = createTempDir("replica");
    NodeProcess replica = startNode(primary.tcpPort, 1, replicaPath, -1, true);
    sendReplicasToPrimary(primary, replica);
    // Index 10 docs into primary:
    LineFileDocs docs = new LineFileDocs(random());
    try (Connection c = new Connection(primary.tcpPort)) {
        c.out.writeByte(SimplePrimaryNode.CMD_INDEXING);
        for (int i = 0; i < 10; i++) {
            Document doc = docs.nextDoc();
            primary.addOrUpdateDocument(c, doc, false);
        }
    }
    // Refresh primary, which also pushes to replica:
    long primaryVersion1 = primary.flush(0);
    assertTrue(primaryVersion1 > 0);
    // Wait for replica to sync up:
    waitForVersionAndHits(replica, primaryVersion1, 10);
    // Commit and crash replica:
    replica.commit();
    replica.crash();
    sendReplicasToPrimary(primary);
    // Index 10 more docs, while replica is down
    try (Connection c = new Connection(primary.tcpPort)) {
        c.out.writeByte(SimplePrimaryNode.CMD_INDEXING);
        for (int i = 0; i < 10; i++) {
            Document doc = docs.nextDoc();
            primary.addOrUpdateDocument(c, doc, false);
        }
    }
    // And flush:
    long primaryVersion2 = primary.flush(0);
    assertTrue(primaryVersion2 > primaryVersion1);
    // Now restart replica:
    replica = startNode(primary.tcpPort, 1, replicaPath, -1, false);
    sendReplicasToPrimary(primary, replica);
    // On startup the replica still searches its last commit:
    assertVersionAndHits(replica, primaryVersion1, 10);
    // Now ask replica to sync:
    replica.newNRTPoint(primaryVersion2, 0, primary.tcpPort);
    waitForVersionAndHits(replica, primaryVersion2, 20);
    replica.close();
    primary.close();
}
Also used : Path(java.nio.file.Path) Document(org.apache.lucene.document.Document) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 13 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestFreeTextSuggester method testWiki.

@Ignore
public void testWiki() throws Exception {
    final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt");
    // Skip header:
    lfd.nextDoc();
    Analyzer analyzer = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(analyzer);
    sug.build(new InputIterator() {

        private int count;

        @Override
        public long weight() {
            return 1;
        }

        @Override
        public BytesRef next() {
            Document doc;
            try {
                doc = lfd.nextDoc();
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            if (doc == null) {
                return null;
            }
            if (count++ == 10000) {
                return null;
            }
            return new BytesRef(doc.get("body"));
        }

        @Override
        public BytesRef payload() {
            return null;
        }

        @Override
        public boolean hasPayloads() {
            return false;
        }

        @Override
        public Set<BytesRef> contexts() {
            return null;
        }

        @Override
        public boolean hasContexts() {
            return false;
        }
    });
    if (VERBOSE) {
        System.out.println(sug.ramBytesUsed() + " bytes");
        List<LookupResult> results = sug.lookup("general r", 10);
        System.out.println("results:");
        for (LookupResult result : results) {
            System.out.println("  " + result);
        }
    }
    analyzer.close();
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet) HashSet(java.util.HashSet) Set(java.util.Set) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) InputIterator(org.apache.lucene.search.suggest.InputIterator) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) BytesRef(org.apache.lucene.util.BytesRef) LineFileDocs(org.apache.lucene.util.LineFileDocs) Ignore(org.junit.Ignore)

Example 14 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class BaseTestCheckIndex method testChecksumsOnly.

public void testChecksumsOnly(Directory dir) throws IOException {
    LineFileDocs lf = new LineFileDocs(random());
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    for (int i = 0; i < 100; i++) {
        iw.addDocument(lf.nextDoc());
    }
    iw.addDocument(new Document());
    iw.commit();
    iw.close();
    lf.close();
    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
    CheckIndex checker = new CheckIndex(dir);
    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
    if (VERBOSE)
        checker.setInfoStream(System.out);
    CheckIndex.Status indexStatus = checker.checkIndex();
    assertTrue(indexStatus.clean);
    checker.close();
    analyzer.close();
}
Also used : PrintStream(java.io.PrintStream) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.apache.lucene.document.Document) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 15 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class ThreadedIndexingAndSearchingTestCase method runTest.

public void runTest(String testName) throws Exception {
    failed.set(false);
    addCount.set(0);
    delCount.set(0);
    packCount.set(0);
    final long t0 = System.currentTimeMillis();
    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random);
    final Path tempDir = createTempDir(testName);
    // some subclasses rely on this being MDW
    dir = getDirectory(newMockFSDirectory(tempDir));
    if (dir instanceof BaseDirectoryWrapper) {
        // don't double-checkIndex, we do it ourselves.
        ((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false);
    }
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
    conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
    if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
        ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
    }
    if (LuceneTestCase.TEST_NIGHTLY) {
        // newIWConfig makes smallish max seg size, which
        // results in tons and tons of segments for this test
        // when run nightly:
        MergePolicy mp = conf.getMergePolicy();
        if (mp instanceof TieredMergePolicy) {
            ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
        } else if (mp instanceof LogByteSizeMergePolicy) {
            ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
        } else if (mp instanceof LogMergePolicy) {
            ((LogMergePolicy) mp).setMaxMergeDocs(100000);
        }
        // when running nightly, merging can still have crazy parameters, 
        // and might use many per-field codecs. turn on CFS for IW flushes
        // and ensure CFS ratio is reasonable to keep it contained.
        conf.setUseCompoundFile(true);
        mp.setNoCFSRatio(Math.max(0.25d, mp.getNoCFSRatio()));
    }
    conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {

        @Override
        public void warm(LeafReader reader) throws IOException {
            if (VERBOSE) {
                System.out.println("TEST: now warm merged reader=" + reader);
            }
            warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
            final int maxDoc = reader.maxDoc();
            final Bits liveDocs = reader.getLiveDocs();
            int sum = 0;
            final int inc = Math.max(1, maxDoc / 50);
            for (int docID = 0; docID < maxDoc; docID += inc) {
                if (liveDocs == null || liveDocs.get(docID)) {
                    final Document doc = reader.document(docID);
                    sum += doc.getFields().size();
                }
            }
            IndexSearcher searcher = newSearcher(reader, false);
            sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
            if (VERBOSE) {
                System.out.println("TEST: warm visited " + sum + " fields");
            }
        }
    });
    if (VERBOSE) {
        conf.setInfoStream(new PrintStreamInfoStream(System.out) {

            @Override
            public void message(String component, String message) {
                if ("TP".equals(component)) {
                    // ignore test points!
                    return;
                }
                super.message(component, message);
            }
        });
    }
    writer = new IndexWriter(dir, conf);
    TestUtil.reduceOpenFiles(writer);
    final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
    doAfterWriter(es);
    final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);
    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;
    final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
    final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
    final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());
    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
    final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);
    if (VERBOSE) {
        System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]");
    }
    // Let index build up a bit
    Thread.sleep(100);
    doSearching(es, stopTime);
    if (VERBOSE) {
        System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
    for (Thread thread : indexThreads) {
        thread.join();
    }
    if (VERBOSE) {
        System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
    }
    final IndexSearcher s = getFinalSearcher();
    if (VERBOSE) {
        System.out.println("TEST: finalSearcher=" + s);
    }
    assertFalse(failed.get());
    boolean doFail = false;
    // Verify: make sure delIDs are in fact deleted:
    for (String id : delIDs) {
        final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
        if (hits.totalHits != 0) {
            System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
            doFail = true;
        }
    }
    // Verify: make sure delPackIDs are in fact deleted:
    for (String id : delPackIDs) {
        final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
        if (hits.totalHits != 0) {
            System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
            doFail = true;
        }
    }
    // Verify: make sure each group of sub-docs are still in docID order:
    for (SubDocs subDocs : allSubDocs) {
        TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
        if (!subDocs.deleted) {
            // We sort by relevance but the scores should be identical so sort falls back to by docID:
            if (hits.totalHits != subDocs.subIDs.size()) {
                System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
                doFail = true;
            } else {
                int lastDocID = -1;
                int startDocID = -1;
                for (ScoreDoc scoreDoc : hits.scoreDocs) {
                    final int docID = scoreDoc.doc;
                    if (lastDocID != -1) {
                        assertEquals(1 + lastDocID, docID);
                    } else {
                        startDocID = docID;
                    }
                    lastDocID = docID;
                    final Document doc = s.doc(docID);
                    assertEquals(subDocs.packID, doc.get("packID"));
                }
                lastDocID = startDocID - 1;
                for (String subID : subDocs.subIDs) {
                    hits = s.search(new TermQuery(new Term("docid", subID)), 1);
                    assertEquals(1, hits.totalHits);
                    final int docID = hits.scoreDocs[0].doc;
                    if (lastDocID != -1) {
                        assertEquals(1 + lastDocID, docID);
                    }
                    lastDocID = docID;
                }
            }
        } else {
            // because we can re-use packID for update:
            for (String subID : subDocs.subIDs) {
                assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
            }
        }
    }
    // Verify: make sure all not-deleted docs are in fact
    // not deleted:
    final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
    docs.close();
    for (int id = 0; id < endID; id++) {
        String stringID = "" + id;
        if (!delIDs.contains(stringID)) {
            final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
            if (hits.totalHits != 1) {
                System.out.println("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs);
                doFail = true;
            }
        }
    }
    assertFalse(doFail);
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs());
    releaseSearcher(s);
    writer.commit();
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
    doClose();
    try {
        writer.commit();
    } finally {
        writer.close();
    }
    // searches, and that IS may be using this es!
    if (es != null) {
        es.shutdown();
        es.awaitTermination(1, TimeUnit.SECONDS);
    }
    TestUtil.checkIndex(dir);
    dir.close();
    if (VERBOSE) {
        System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) LineFileDocs(org.apache.lucene.util.LineFileDocs) Path(java.nio.file.Path) TermQuery(org.apache.lucene.search.TermQuery) NamedThreadFactory(org.apache.lucene.util.NamedThreadFactory) FailOnNonBulkMergesInfoStream(org.apache.lucene.util.FailOnNonBulkMergesInfoStream) IOException(java.io.IOException) ExecutorService(java.util.concurrent.ExecutorService) Bits(org.apache.lucene.util.Bits) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream)

Aggregations

LineFileDocs (org.apache.lucene.util.LineFileDocs)45 Document (org.apache.lucene.document.Document)27 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)24 Directory (org.apache.lucene.store.Directory)17 Path (java.nio.file.Path)16 IOException (java.io.IOException)9 BytesRef (org.apache.lucene.util.BytesRef)8 Random (java.util.Random)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 ArrayList (java.util.ArrayList)5 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)5 IntPoint (org.apache.lucene.document.IntPoint)4 RAMDirectory (org.apache.lucene.store.RAMDirectory)4 HashMap (java.util.HashMap)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 Analyzer (org.apache.lucene.analysis.Analyzer)3 DoublePoint (org.apache.lucene.document.DoublePoint)3 Field (org.apache.lucene.document.Field)3 FloatPoint (org.apache.lucene.document.FloatPoint)3 LongPoint (org.apache.lucene.document.LongPoint)3