Search in sources :

Example 71 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestNRTCachingDirectory method testNRTAndCommit.

public void testNRTAndCommit() throws Exception {
    Directory dir = newDirectory();
    NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
    final LineFileDocs docs = new LineFileDocs(random());
    final int numDocs = TestUtil.nextInt(random(), 100, 400);
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs);
    }
    final List<BytesRef> ids = new ArrayList<>();
    DirectoryReader r = null;
    for (int docCount = 0; docCount < numDocs; docCount++) {
        final Document doc = docs.nextDoc();
        ids.add(new BytesRef(doc.get("docid")));
        w.addDocument(doc);
        if (random().nextInt(20) == 17) {
            if (r == null) {
                r = DirectoryReader.open(w.w);
            } else {
                final DirectoryReader r2 = DirectoryReader.openIfChanged(r);
                if (r2 != null) {
                    r.close();
                    r = r2;
                }
            }
            assertEquals(1 + docCount, r.numDocs());
            final IndexSearcher s = newSearcher(r);
            // Just make sure search can run; we can't assert
            // totHits since it could be 0
            TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
        // System.out.println("tot hits " + hits.totalHits);
        }
    }
    if (r != null) {
        r.close();
    }
    // Close should force cache to clear since all files are sync'd
    w.close();
    final String[] cachedFiles = cachedDir.listCachedFiles();
    for (String file : cachedFiles) {
        System.out.println("FAIL: cached file " + file + " remains after sync");
    }
    assertEquals(0, cachedFiles.length);
    r = DirectoryReader.open(dir);
    for (BytesRef id : ids) {
        assertEquals(1, r.docFreq(new Term("docid", id)));
    }
    r.close();
    cachedDir.close();
    docs.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 72 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TaxonomyMergeUtils method merge.

/**
   * Merges the given taxonomy and index directories and commits the changes to
   * the given writers.
   */
public static void merge(Directory srcIndexDir, Directory srcTaxoDir, OrdinalMap map, IndexWriter destIndexWriter, DirectoryTaxonomyWriter destTaxoWriter, FacetsConfig srcConfig) throws IOException {
    // merge the taxonomies
    destTaxoWriter.addTaxonomy(srcTaxoDir, map);
    int[] ordinalMap = map.getMap();
    DirectoryReader reader = DirectoryReader.open(srcIndexDir);
    try {
        List<LeafReaderContext> leaves = reader.leaves();
        int numReaders = leaves.size();
        CodecReader[] wrappedLeaves = new CodecReader[numReaders];
        for (int i = 0; i < numReaders; i++) {
            wrappedLeaves[i] = SlowCodecReaderWrapper.wrap(new OrdinalMappingLeafReader(leaves.get(i).reader(), ordinalMap, srcConfig));
        }
        destIndexWriter.addIndexes(wrappedLeaves);
        // commit changes to taxonomy and index respectively.
        destTaxoWriter.commit();
        destIndexWriter.commit();
    } finally {
        reader.close();
    }
}
Also used : CodecReader(org.apache.lucene.index.CodecReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 73 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class DirectoryTaxonomyReader method doOpenIfChanged.

/**
   * Implements the opening of a new {@link DirectoryTaxonomyReader} instance if
   * the taxonomy has changed.
   * 
   * <p>
   * <b>NOTE:</b> the returned {@link DirectoryTaxonomyReader} shares the
   * ordinal and category caches with this reader. This is not expected to cause
   * any issues, unless the two instances continue to live. The reader
   * guarantees that the two instances cannot affect each other in terms of
   * correctness of the caches, however if the size of the cache is changed
   * through {@link #setCacheSize(int)}, it will affect both reader instances.
   */
@Override
protected DirectoryTaxonomyReader doOpenIfChanged() throws IOException {
    ensureOpen();
    // This works for both NRT and non-NRT readers (i.e. an NRT reader remains NRT).
    final DirectoryReader r2 = DirectoryReader.openIfChanged(indexReader);
    if (r2 == null) {
        // no changes, nothing to do
        return null;
    }
    // check if the taxonomy was recreated
    boolean success = false;
    try {
        boolean recreated = false;
        if (taxoWriter == null) {
            // not NRT, check epoch from commit data
            String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
            String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
            if (t1 == null) {
                if (t2 != null) {
                    recreated = true;
                }
            } else if (!t1.equals(t2)) {
                // t1 != null and t2 must not be null b/c DirTaxoWriter always puts the commit data.
                // it's ok to use String.equals because we require the two epoch values to be the same.
                recreated = true;
            }
        } else {
            // NRT, compare current taxoWriter.epoch() vs the one that was given at construction
            if (taxoEpoch != taxoWriter.getTaxonomyEpoch()) {
                recreated = true;
            }
        }
        final DirectoryTaxonomyReader newtr;
        if (recreated) {
            // if recreated, do not reuse anything from this instace. the information
            // will be lazily computed by the new instance when needed.
            newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
        } else {
            newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
        }
        success = true;
        return newtr;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(r2);
        }
    }
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader)

Example 74 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class DirectoryTaxonomyWriter method addTaxonomy.

/**
   * Takes the categories from the given taxonomy directory, and adds the
   * missing ones to this taxonomy. Additionally, it fills the given
   * {@link OrdinalMap} with a mapping from the original ordinal to the new
   * ordinal.
   */
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
    ensureOpen();
    DirectoryReader r = DirectoryReader.open(taxoDir);
    try {
        final int size = r.numDocs();
        final OrdinalMap ordinalMap = map;
        ordinalMap.setSize(size);
        int base = 0;
        PostingsEnum docs = null;
        for (final LeafReaderContext ctx : r.leaves()) {
            final LeafReader ar = ctx.reader();
            final Terms terms = ar.terms(Consts.FULL);
            // TODO: share per-segment TermsEnum here!
            TermsEnum te = terms.iterator();
            while (te.next() != null) {
                FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
                final int ordinal = addCategory(cp);
                docs = te.postings(docs, PostingsEnum.NONE);
                ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
            }
            // no deletions, so we're ok
            base += ar.maxDoc();
        }
        ordinalMap.addDone();
    } finally {
        r.close();
    }
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) PostingsEnum(org.apache.lucene.index.PostingsEnum) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 75 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testDocsWithSortedSetValues.

public void testDocsWithSortedSetValues() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "sorted";
        int numDocs = TestUtil.nextInt(random(), 1, 100);
        BytesRef[][] docValues = new BytesRef[numDocs][];
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            if (random().nextBoolean()) {
                // not all documents have a value
                int numValues = TestUtil.nextInt(random(), 1, 5);
                docValues[i] = new BytesRef[numValues];
                for (int j = 0; j < numValues; j++) {
                    BytesRef val = TestUtil.randomBinaryTerm(random());
                    doc.add(new SortedSetDocValuesField(field, val));
                    docValues[i][j] = val;
                }
                doc.add(new StringField("id", "doc" + i, Store.NO));
            }
            indexWriter.addDocument(doc);
        }
        // 20% of cases delete some docs
        if (random().nextDouble() < 0.2) {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextBoolean()) {
                    indexWriter.deleteDocuments(new Term("id", "doc" + i));
                    docValues[i] = null;
                }
            }
        }
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            SortedSetDocValuesStats stats = new SortedSetDocValuesStats(field);
            TotalHitCountCollector totalHitCount = new TotalHitCountCollector();
            searcher.search(new MatchAllDocsQuery(), MultiCollector.wrap(totalHitCount, new DocValuesStatsCollector(stats)));
            int expCount = (int) nonNull(docValues).count();
            assertEquals(expCount, stats.count());
            int numDocsWithoutField = (int) isNull(docValues).count();
            assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
            if (stats.count() > 0) {
                assertEquals(nonNull(docValues).flatMap(Arrays::stream).min(BytesRef::compareTo).get(), stats.min());
                assertEquals(nonNull(docValues).flatMap(Arrays::stream).max(BytesRef::compareTo).get(), stats.max());
            }
        }
    }
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SortedSetDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedSetDocValuesStats) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

DirectoryReader (org.apache.lucene.index.DirectoryReader)362 Document (org.apache.lucene.document.Document)228 Directory (org.apache.lucene.store.Directory)206 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)149 IndexWriter (org.apache.lucene.index.IndexWriter)139 Term (org.apache.lucene.index.Term)134 IndexSearcher (org.apache.lucene.search.IndexSearcher)101 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)98 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)96 Test (org.junit.Test)64 StringField (org.apache.lucene.document.StringField)61 Analyzer (org.apache.lucene.analysis.Analyzer)54 BytesRef (org.apache.lucene.util.BytesRef)51 LeafReader (org.apache.lucene.index.LeafReader)49 ArrayList (java.util.ArrayList)46 Field (org.apache.lucene.document.Field)45 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)44 TermQuery (org.apache.lucene.search.TermQuery)42 IOException (java.io.IOException)37 TextField (org.apache.lucene.document.TextField)36