Search in sources :

Example 46 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class TestCompressingTermVectorsFormat method testChunkCleanup.

/**
   * writes some tiny segments with incomplete compressed blocks,
   * and ensures merge recompresses them.
   */
public void testChunkCleanup() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMergePolicy(NoMergePolicy.INSTANCE);
    // we have to enforce certain things like maxDocsPerChunk to cause dirty chunks to be created
    // by this test.
    iwConf.setCodec(CompressingCodec.randomInstance(random(), 4 * 1024, 100, false, 8));
    IndexWriter iw = new IndexWriter(dir, iwConf);
    DirectoryReader ir = DirectoryReader.open(iw);
    for (int i = 0; i < 5; i++) {
        Document doc = new Document();
        FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
        ft.setStoreTermVectors(true);
        doc.add(new Field("text", "not very long at all", ft));
        iw.addDocument(doc);
        // force flush
        DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
        assertNotNull(ir2);
        ir.close();
        ir = ir2;
        // examine dirty counts:
        for (LeafReaderContext leaf : ir2.leaves()) {
            CodecReader sr = (CodecReader) leaf.reader();
            CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
            assertEquals(1, reader.getNumChunks());
            assertEquals(1, reader.getNumDirtyChunks());
        }
    }
    iw.getConfig().setMergePolicy(newLogMergePolicy());
    iw.forceMerge(1);
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
    assertNotNull(ir2);
    ir.close();
    ir = ir2;
    CodecReader sr = (CodecReader) getOnlyLeafReader(ir);
    CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
    // we could get lucky, and have zero, but typically one.
    assertTrue(reader.getNumDirtyChunks() <= 1);
    ir.close();
    iw.close();
    dir.close();
}
Also used : Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) CodecReader(org.apache.lucene.index.CodecReader) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Example 47 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class BitDocSet method getTopFilter.

@Override
public Filter getTopFilter() {
    return new Filter() {

        final FixedBitSet bs = bits;

        @Override
        public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) {
            LeafReader reader = context.reader();
            // all Solr DocSets that are used as filters only include live docs
            final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
            if (context.isTopLevel) {
                return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bs), acceptDocs);
            }
            final int base = context.docBase;
            // one past the max doc in this segment.
            final int max = base + reader.maxDoc();
            return BitsFilteredDocIdSet.wrap(new DocIdSet() {

                @Override
                public DocIdSetIterator iterator() {
                    return new DocIdSetIterator() {

                        int pos = base - 1;

                        int adjustedDoc = -1;

                        @Override
                        public int docID() {
                            return adjustedDoc;
                        }

                        @Override
                        public int nextDoc() {
                            int next = pos + 1;
                            if (next >= max) {
                                return adjustedDoc = NO_MORE_DOCS;
                            } else {
                                pos = bs.nextSetBit(next);
                                return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
                            }
                        }

                        @Override
                        public int advance(int target) {
                            if (target == NO_MORE_DOCS)
                                return adjustedDoc = NO_MORE_DOCS;
                            int adjusted = target + base;
                            if (adjusted >= max) {
                                return adjustedDoc = NO_MORE_DOCS;
                            } else {
                                pos = bs.nextSetBit(adjusted);
                                return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
                            }
                        }

                        @Override
                        public long cost() {
                            // we don't want to actually compute cardinality, but
                            // if it's already been computed, we use it (pro-rated for the segment)
                            int maxDoc = max - base;
                            if (size != -1) {
                                return (long) (size * ((FixedBitSet.bits2words(maxDoc) << 6) / (float) bs.length()));
                            } else {
                                return maxDoc;
                            }
                        }
                    };
                }

                @Override
                public long ramBytesUsed() {
                    return bs.ramBytesUsed();
                }

                @Override
                public Bits bits() {
                    return new Bits() {

                        @Override
                        public boolean get(int index) {
                            return bs.get(index + base);
                        }

                        @Override
                        public int length() {
                            return max - base;
                        }
                    };
                }
            }, acceptDocs2);
        }

        @Override
        public String toString(String field) {
            return "BitSetDocTopFilter";
        }

        @Override
        public boolean equals(Object other) {
            return sameClassAs(other) && Objects.equals(bs, getClass().cast(other).bs);
        }

        @Override
        public int hashCode() {
            return classHash() * 31 + bs.hashCode();
        }
    };
}
Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) LeafReader(org.apache.lucene.index.LeafReader) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) DocIdSet(org.apache.lucene.search.DocIdSet) BitDocIdSet(org.apache.lucene.util.BitDocIdSet) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 48 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class DocSetBase method getTopFilter.

@Override
public Filter getTopFilter() {
    return new Filter() {

        final FixedBitSet bs = getBits();

        @Override
        public DocIdSet getDocIdSet(final LeafReaderContext context, Bits acceptDocs) {
            LeafReader reader = context.reader();
            // all Solr DocSets that are used as filters only include live docs
            final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
            if (context.isTopLevel) {
                return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bs), acceptDocs);
            }
            final int base = context.docBase;
            final int maxDoc = reader.maxDoc();
            // one past the max doc in this segment.
            final int max = base + maxDoc;
            return BitsFilteredDocIdSet.wrap(new DocIdSet() {

                @Override
                public DocIdSetIterator iterator() {
                    return new DocIdSetIterator() {

                        int pos = base - 1;

                        int adjustedDoc = -1;

                        @Override
                        public int docID() {
                            return adjustedDoc;
                        }

                        @Override
                        public int nextDoc() {
                            // TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
                            pos = bs.nextSetBit(pos + 1);
                            return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
                        }

                        @Override
                        public int advance(int target) {
                            if (target == NO_MORE_DOCS)
                                return adjustedDoc = NO_MORE_DOCS;
                            pos = bs.nextSetBit(target + base);
                            return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
                        }

                        @Override
                        public long cost() {
                            return bs.length();
                        }
                    };
                }

                @Override
                public long ramBytesUsed() {
                    return bs.ramBytesUsed();
                }

                @Override
                public Bits bits() {
                    // sparse filters should not use random access
                    return null;
                }
            }, acceptDocs2);
        }

        @Override
        public String toString(String field) {
            return "DocSetTopFilter";
        }

        @Override
        public boolean equals(Object other) {
            return sameClassAs(other) && Objects.equals(bs, getClass().cast(other).bs);
        }

        @Override
        public int hashCode() {
            return classHash() ^ bs.hashCode();
        }
    };
}
Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) LeafReader(org.apache.lucene.index.LeafReader) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) DocIdSet(org.apache.lucene.search.DocIdSet) BitDocIdSet(org.apache.lucene.util.BitDocIdSet) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 49 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class DocSetUtil method createSmallSet.

private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {
    int[] docs = new int[maxPossible];
    int sz = 0;
    for (int i = firstReader; i < postList.length; i++) {
        PostingsEnum postings = postList[i];
        if (postings == null)
            continue;
        LeafReaderContext ctx = leaves.get(i);
        Bits liveDocs = ctx.reader().getLiveDocs();
        int base = ctx.docBase;
        for (; ; ) {
            int subId = postings.nextDoc();
            if (subId == DocIdSetIterator.NO_MORE_DOCS)
                break;
            if (liveDocs != null && !liveDocs.get(subId))
                continue;
            int globalId = subId + base;
            docs[sz++] = globalId;
        }
    }
    return new SortedIntDocSet(docs, sz);
}
Also used : LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) PostingsEnum(org.apache.lucene.index.PostingsEnum)

Example 50 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class DocSetUtil method createBigSet.

private static DocSet createBigSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxDoc, int firstReader) throws IOException {
    long[] bits = new long[FixedBitSet.bits2words(maxDoc)];
    int sz = 0;
    for (int i = firstReader; i < postList.length; i++) {
        PostingsEnum postings = postList[i];
        if (postings == null)
            continue;
        LeafReaderContext ctx = leaves.get(i);
        Bits liveDocs = ctx.reader().getLiveDocs();
        int base = ctx.docBase;
        for (; ; ) {
            int subId = postings.nextDoc();
            if (subId == DocIdSetIterator.NO_MORE_DOCS)
                break;
            if (liveDocs != null && !liveDocs.get(subId))
                continue;
            int globalId = subId + base;
            bits[globalId >> 6] |= (1L << globalId);
            sz++;
        }
    }
    BitDocSet docSet = new BitDocSet(new FixedBitSet(bits, maxDoc), sz);
    int smallSetSize = smallSetSize(maxDoc);
    if (sz < smallSetSize) {
        // make this optional?
        DocSet smallSet = toSmallSet(docSet);
        // assert equals(docSet, smallSet);
        return smallSet;
    }
    return docSet;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) PostingsEnum(org.apache.lucene.index.PostingsEnum)

Aggregations

LeafReaderContext (org.apache.lucene.index.LeafReaderContext)326 LeafReader (org.apache.lucene.index.LeafReader)70 Document (org.apache.lucene.document.Document)68 BytesRef (org.apache.lucene.util.BytesRef)66 IOException (java.io.IOException)65 Directory (org.apache.lucene.store.Directory)57 Term (org.apache.lucene.index.Term)50 IndexSearcher (org.apache.lucene.search.IndexSearcher)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 DirectoryReader (org.apache.lucene.index.DirectoryReader)44 Bits (org.apache.lucene.util.Bits)44 IndexReader (org.apache.lucene.index.IndexReader)43 NumericDocValues (org.apache.lucene.index.NumericDocValues)40 ArrayList (java.util.ArrayList)39 Terms (org.apache.lucene.index.Terms)35 Weight (org.apache.lucene.search.Weight)35 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)34 Scorer (org.apache.lucene.search.Scorer)34 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)33 TermsEnum (org.apache.lucene.index.TermsEnum)31