Examples with LineFileDocs - org.apache.lucene.util.LineFileDocs

Example 41 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestIndexWriterOutOfFileDescriptors method test.

public void test() throws Exception {
    MockDirectoryWrapper dir = newMockFSDirectory(createTempDir("TestIndexWriterOutOfFileDescriptors"));
    double rate = random().nextDouble() * 0.01;
    //System.out.println("rate=" + rate);
    dir.setRandomIOExceptionRateOnOpen(rate);
    int iters = atLeast(20);
    LineFileDocs docs = new LineFileDocs(random());
    DirectoryReader r = null;
    DirectoryReader r2 = null;
    boolean any = false;
    MockDirectoryWrapper dirCopy = null;
    int lastNumDocs = 0;
    for (int iter = 0; iter < iters; iter++) {
        IndexWriter w = null;
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
        }
        try {
            MockAnalyzer analyzer = new MockAnalyzer(random());
            analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
            IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
            if (VERBOSE) {
                // Do this ourselves instead of relying on LTC so
                // we see incrementing messageID:
                iwc.setInfoStream(new PrintStreamInfoStream(System.out));
            }
            MergeScheduler ms = iwc.getMergeScheduler();
            if (ms instanceof ConcurrentMergeScheduler) {
                ((ConcurrentMergeScheduler) ms).setSuppressExceptions();
            }
            w = new IndexWriter(dir, iwc);
            if (r != null && random().nextInt(5) == 3) {
                if (random().nextBoolean()) {
                    if (VERBOSE) {
                        System.out.println("TEST: addIndexes LR[]");
                    }
                    TestUtil.addIndexesSlowly(w, r);
                } else {
                    if (VERBOSE) {
                        System.out.println("TEST: addIndexes Directory[]");
                    }
                    w.addIndexes(new Directory[] { dirCopy });
                }
            } else {
                if (VERBOSE) {
                    System.out.println("TEST: addDocument");
                }
                w.addDocument(docs.nextDoc());
            }
            dir.setRandomIOExceptionRateOnOpen(0.0);
            if (ms instanceof ConcurrentMergeScheduler) {
                ((ConcurrentMergeScheduler) ms).sync();
            }
            // If exc hit CMS then writer will be tragically closed:
            if (w.getTragicException() == null) {
                w.close();
            }
            w = null;
            // NOTE: This is O(N^2)!  Only enable for temporary debugging:
            //dir.setRandomIOExceptionRateOnOpen(0.0);
            //_TestUtil.checkIndex(dir);
            //dir.setRandomIOExceptionRateOnOpen(rate);
            // Verify numDocs only increases, to catch IndexWriter
            // accidentally deleting the index:
            dir.setRandomIOExceptionRateOnOpen(0.0);
            assertTrue(DirectoryReader.indexExists(dir));
            if (r2 == null) {
                r2 = DirectoryReader.open(dir);
            } else {
                DirectoryReader r3 = DirectoryReader.openIfChanged(r2);
                if (r3 != null) {
                    r2.close();
                    r2 = r3;
                }
            }
            assertTrue("before=" + lastNumDocs + " after=" + r2.numDocs(), r2.numDocs() >= lastNumDocs);
            lastNumDocs = r2.numDocs();
            //System.out.println("numDocs=" + lastNumDocs);
            dir.setRandomIOExceptionRateOnOpen(rate);
            any = true;
            if (VERBOSE) {
                System.out.println("TEST: iter=" + iter + ": success");
            }
        } catch (AssertionError | IOException ioe) {
            if (VERBOSE) {
                System.out.println("TEST: iter=" + iter + ": exception");
                ioe.printStackTrace();
            }
            if (w != null) {
                // NOTE: leave random IO exceptions enabled here,
                // to verify that rollback does not try to write
                // anything:
                w.rollback();
            }
        }
        if (any && r == null && random().nextBoolean()) {
            // Make a copy of a non-empty index so we can use
            // it to addIndexes later:
            dir.setRandomIOExceptionRateOnOpen(0.0);
            r = DirectoryReader.open(dir);
            dirCopy = newMockFSDirectory(createTempDir("TestIndexWriterOutOfFileDescriptors.copy"));
            Set<String> files = new HashSet<>();
            for (String file : dir.listAll()) {
                if (file.startsWith(IndexFileNames.SEGMENTS) || IndexFileNames.CODEC_FILE_PATTERN.matcher(file).matches()) {
                    dirCopy.copyFrom(dir, file, file, IOContext.DEFAULT);
                    files.add(file);
                }
            }
            dirCopy.sync(files);
            // Have IW kiss the dir so we remove any leftover
            // files ... we can easily have leftover files at
            // the time we take a copy because we are holding
            // open a reader:
            new IndexWriter(dirCopy, newIndexWriterConfig(new MockAnalyzer(random()))).close();
            dirCopy.setRandomIOExceptionRate(rate);
            dir.setRandomIOExceptionRateOnOpen(rate);
        }
    }
    if (r2 != null) {
        r2.close();
    }
    if (r != null) {
        r.close();
        dirCopy.close();
    }
    dir.close();
}

Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) IOException(java.io.IOException) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream) LineFileDocs(org.apache.lucene.util.LineFileDocs) HashSet(java.util.HashSet)

Example 42 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestSameScoresWithThreads method test.

public void test() throws Exception {
    final Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    LineFileDocs docs = new LineFileDocs(random());
    int charsToIndex = atLeast(100000);
    int charsIndexed = 0;
    //System.out.println("bytesToIndex=" + charsToIndex);
    while (charsIndexed < charsToIndex) {
        Document doc = docs.nextDoc();
        charsIndexed += doc.get("body").length();
        w.addDocument(doc);
    //System.out.println("  bytes=" + charsIndexed + " add: " + doc);
    }
    IndexReader r = w.getReader();
    //System.out.println("numDocs=" + r.numDocs());
    w.close();
    final IndexSearcher s = newSearcher(r);
    Terms terms = MultiFields.getFields(r).terms("body");
    int termCount = 0;
    TermsEnum termsEnum = terms.iterator();
    while (termsEnum.next() != null) {
        termCount++;
    }
    assertTrue(termCount > 0);
    // Target ~10 terms to search:
    double chance = 10.0 / termCount;
    termsEnum = terms.iterator();
    final Map<BytesRef, TopDocs> answers = new HashMap<>();
    while (termsEnum.next() != null) {
        if (random().nextDouble() <= chance) {
            BytesRef term = BytesRef.deepCopyOf(termsEnum.term());
            answers.put(term, s.search(new TermQuery(new Term("body", term)), 100));
        }
    }
    if (!answers.isEmpty()) {
        final CountDownLatch startingGun = new CountDownLatch(1);
        int numThreads = TestUtil.nextInt(random(), 2, 5);
        Thread[] threads = new Thread[numThreads];
        for (int threadID = 0; threadID < numThreads; threadID++) {
            Thread thread = new Thread() {

                @Override
                public void run() {
                    try {
                        startingGun.await();
                        for (int i = 0; i < 20; i++) {
                            List<Map.Entry<BytesRef, TopDocs>> shuffled = new ArrayList<>(answers.entrySet());
                            Collections.shuffle(shuffled, random());
                            for (Map.Entry<BytesRef, TopDocs> ent : shuffled) {
                                TopDocs actual = s.search(new TermQuery(new Term("body", ent.getKey())), 100);
                                TopDocs expected = ent.getValue();
                                assertEquals(expected.totalHits, actual.totalHits);
                                assertEquals("query=" + ent.getKey().utf8ToString(), expected.scoreDocs.length, actual.scoreDocs.length);
                                for (int hit = 0; hit < expected.scoreDocs.length; hit++) {
                                    assertEquals(expected.scoreDocs[hit].doc, actual.scoreDocs[hit].doc);
                                    // Floats really should be identical:
                                    assertTrue(expected.scoreDocs[hit].score == actual.scoreDocs[hit].score);
                                }
                            }
                        }
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            };
            threads[threadID] = thread;
            thread.start();
        }
        startingGun.countDown();
        for (Thread thread : threads) {
            thread.join();
        }
    }
    r.close();
    dir.close();
}

Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) TermsEnum(org.apache.lucene.index.TermsEnum) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) LineFileDocs(org.apache.lucene.util.LineFileDocs) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) CountDownLatch(java.util.concurrent.CountDownLatch) IndexReader(org.apache.lucene.index.IndexReader) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 43 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestSearcherManager method testConcurrentIndexCloseSearchAndRefresh.

public void testConcurrentIndexCloseSearchAndRefresh() throws Exception {
    final Directory dir = newFSDirectory(createTempDir());
    AtomicReference<IndexWriter> writerRef = new AtomicReference<>();
    final MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(IndexWriter.MAX_TERM_LENGTH);
    writerRef.set(new IndexWriter(dir, newIndexWriterConfig(analyzer)));
    AtomicReference<SearcherManager> mgrRef = new AtomicReference<>();
    mgrRef.set(new SearcherManager(writerRef.get(), null));
    final AtomicBoolean stop = new AtomicBoolean();
    Thread indexThread = new Thread() {

        @Override
        public void run() {
            try {
                LineFileDocs docs = new LineFileDocs(random());
                long runTimeSec = TEST_NIGHTLY ? atLeast(10) : atLeast(2);
                long endTime = System.nanoTime() + runTimeSec * 1000000000;
                while (System.nanoTime() < endTime) {
                    IndexWriter w = writerRef.get();
                    w.addDocument(docs.nextDoc());
                    if (random().nextInt(1000) == 17) {
                        if (random().nextBoolean()) {
                            w.close();
                        } else {
                            w.rollback();
                        }
                        writerRef.set(new IndexWriter(dir, newIndexWriterConfig(analyzer)));
                    }
                }
                docs.close();
                stop.set(true);
                if (VERBOSE) {
                    System.out.println("TEST: index count=" + writerRef.get().maxDoc());
                }
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
        }
    };
    Thread searchThread = new Thread() {

        @Override
        public void run() {
            try {
                long totCount = 0;
                while (stop.get() == false) {
                    SearcherManager mgr = mgrRef.get();
                    if (mgr != null) {
                        IndexSearcher searcher;
                        try {
                            searcher = mgr.acquire();
                        } catch (AlreadyClosedException ace) {
                            // ok
                            continue;
                        }
                        totCount += searcher.getIndexReader().maxDoc();
                        mgr.release(searcher);
                    }
                }
                if (VERBOSE) {
                    System.out.println("TEST: search totCount=" + totCount);
                }
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
        }
    };
    Thread refreshThread = new Thread() {

        @Override
        public void run() {
            try {
                int refreshCount = 0;
                int aceCount = 0;
                while (stop.get() == false) {
                    SearcherManager mgr = mgrRef.get();
                    if (mgr != null) {
                        refreshCount++;
                        try {
                            mgr.maybeRefreshBlocking();
                        } catch (AlreadyClosedException ace) {
                            // ok
                            aceCount++;
                            continue;
                        }
                    }
                }
                if (VERBOSE) {
                    System.out.println("TEST: refresh count=" + refreshCount + " aceCount=" + aceCount);
                }
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
        }
    };
    Thread closeThread = new Thread() {

        @Override
        public void run() {
            try {
                int closeCount = 0;
                int aceCount = 0;
                while (stop.get() == false) {
                    SearcherManager mgr = mgrRef.get();
                    assert mgr != null;
                    mgr.close();
                    closeCount++;
                    while (stop.get() == false) {
                        try {
                            mgrRef.set(new SearcherManager(writerRef.get(), null));
                            break;
                        } catch (AlreadyClosedException ace) {
                            // ok
                            aceCount++;
                        }
                    }
                }
                if (VERBOSE) {
                    System.out.println("TEST: close count=" + closeCount + " aceCount=" + aceCount);
                }
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
        }
    };
    indexThread.start();
    searchThread.start();
    refreshThread.start();
    closeThread.start();
    indexThread.join();
    searchThread.join();
    refreshThread.join();
    closeThread.join();
    mgrRef.get().close();
    writerRef.get().close();
    dir.close();
}

Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 44 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class CommonTermsQueryTest method createRandomIndex.

/**
   * populates a writer with random stuff. this must be fully reproducable with
   * the seed!
   */
public static void createRandomIndex(int numdocs, RandomIndexWriter writer, long seed) throws IOException {
    Random random = new Random(seed);
    // primary source for our data is from linefiledocs, it's realistic.
    LineFileDocs lineFileDocs = new LineFileDocs(random);
    // because linefiledocs doesn't cover all the possibilities.
    for (int i = 0; i < numdocs; i++) {
        writer.addDocument(lineFileDocs.nextDoc());
    }
    lineFileDocs.close();
}

Also used : Random(java.util.Random) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 45 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testRandomRealisticKeys.

public void testRandomRealisticKeys() throws IOException {
    LineFileDocs lineFile = new LineFileDocs(random());
    Map<String, Long> mapping = new HashMap<>();
    List<Input> keys = new ArrayList<>();
    // this might bring up duplicates
    int howMany = atLeast(100);
    for (int i = 0; i < howMany; i++) {
        Document nextDoc = lineFile.nextDoc();
        String title = nextDoc.getField("title").stringValue();
        int randomWeight = random().nextInt(100);
        keys.add(new Input(title, randomWeight));
        if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
            mapping.put(title, Long.valueOf(randomWeight));
        }
    }
    Analyzer indexAnalyzer = new MockAnalyzer(random());
    Analyzer queryAnalyzer = new MockAnalyzer(random());
    Directory tempDir = getDirectory();
    AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(tempDir, "suggest", indexAnalyzer, queryAnalyzer, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, random().nextBoolean());
    boolean doPayloads = random().nextBoolean();
    if (doPayloads) {
        List<Input> keysAndPayloads = new ArrayList<>();
        for (Input termFreq : keys) {
            keysAndPayloads.add(new Input(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
        }
        analyzingSuggester.build(new InputArrayIterator(keysAndPayloads));
    } else {
        analyzingSuggester.build(new InputArrayIterator(keys));
    }
    for (Input termFreq : keys) {
        List<LookupResult> lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
        for (LookupResult lookupResult : lookup) {
            assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
            if (doPayloads) {
                assertEquals(lookupResult.payload.utf8ToString(), Long.toString(lookupResult.value));
            } else {
                assertNull(lookupResult.payload);
            }
        }
    }
    IOUtils.close(lineFile, indexAnalyzer, queryAnalyzer, tempDir);
}

Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) BytesRef(org.apache.lucene.util.BytesRef) LineFileDocs(org.apache.lucene.util.LineFileDocs) Directory(org.apache.lucene.store.Directory)

Aggregations

LineFileDocs (org.apache.lucene.util.LineFileDocs)45 Document (org.apache.lucene.document.Document)27 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)24 Directory (org.apache.lucene.store.Directory)17 Path (java.nio.file.Path)16 IOException (java.io.IOException)9 BytesRef (org.apache.lucene.util.BytesRef)8 Random (java.util.Random)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 ArrayList (java.util.ArrayList)5 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)5 IntPoint (org.apache.lucene.document.IntPoint)4 RAMDirectory (org.apache.lucene.store.RAMDirectory)4 HashMap (java.util.HashMap)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 Analyzer (org.apache.lucene.analysis.Analyzer)3 DoublePoint (org.apache.lucene.document.DoublePoint)3 Field (org.apache.lucene.document.Field)3 FloatPoint (org.apache.lucene.document.FloatPoint)3 LongPoint (org.apache.lucene.document.LongPoint)3