use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class TestCompressingTermVectorsFormat method testChunkCleanup.
/**
* writes some tiny segments with incomplete compressed blocks,
* and ensures merge recompresses them.
*/
public void testChunkCleanup() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMergePolicy(NoMergePolicy.INSTANCE);
// we have to enforce certain things like maxDocsPerChunk to cause dirty chunks to be created
// by this test.
iwConf.setCodec(CompressingCodec.randomInstance(random(), 4 * 1024, 100, false, 8));
IndexWriter iw = new IndexWriter(dir, iwConf);
DirectoryReader ir = DirectoryReader.open(iw);
for (int i = 0; i < 5; i++) {
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(new Field("text", "not very long at all", ft));
iw.addDocument(doc);
// force flush
DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
assertNotNull(ir2);
ir.close();
ir = ir2;
// examine dirty counts:
for (LeafReaderContext leaf : ir2.leaves()) {
CodecReader sr = (CodecReader) leaf.reader();
CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
assertEquals(1, reader.getNumChunks());
assertEquals(1, reader.getNumDirtyChunks());
}
}
iw.getConfig().setMergePolicy(newLogMergePolicy());
iw.forceMerge(1);
DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
assertNotNull(ir2);
ir.close();
ir = ir2;
CodecReader sr = (CodecReader) getOnlyLeafReader(ir);
CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
// we could get lucky, and have zero, but typically one.
assertTrue(reader.getNumDirtyChunks() <= 1);
ir.close();
iw.close();
dir.close();
}
use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class BitDocSet method getTopFilter.
@Override
public Filter getTopFilter() {
return new Filter() {
final FixedBitSet bs = bits;
@Override
public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) {
LeafReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
if (context.isTopLevel) {
return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bs), acceptDocs);
}
final int base = context.docBase;
// one past the max doc in this segment.
final int max = base + reader.maxDoc();
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int pos = base - 1;
int adjustedDoc = -1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() {
int next = pos + 1;
if (next >= max) {
return adjustedDoc = NO_MORE_DOCS;
} else {
pos = bs.nextSetBit(next);
return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
}
}
@Override
public int advance(int target) {
if (target == NO_MORE_DOCS)
return adjustedDoc = NO_MORE_DOCS;
int adjusted = target + base;
if (adjusted >= max) {
return adjustedDoc = NO_MORE_DOCS;
} else {
pos = bs.nextSetBit(adjusted);
return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
}
}
@Override
public long cost() {
// we don't want to actually compute cardinality, but
// if it's already been computed, we use it (pro-rated for the segment)
int maxDoc = max - base;
if (size != -1) {
return (long) (size * ((FixedBitSet.bits2words(maxDoc) << 6) / (float) bs.length()));
} else {
return maxDoc;
}
}
};
}
@Override
public long ramBytesUsed() {
return bs.ramBytesUsed();
}
@Override
public Bits bits() {
return new Bits() {
@Override
public boolean get(int index) {
return bs.get(index + base);
}
@Override
public int length() {
return max - base;
}
};
}
}, acceptDocs2);
}
@Override
public String toString(String field) {
return "BitSetDocTopFilter";
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) && Objects.equals(bs, getClass().cast(other).bs);
}
@Override
public int hashCode() {
return classHash() * 31 + bs.hashCode();
}
};
}
use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class DocSetBase method getTopFilter.
@Override
public Filter getTopFilter() {
return new Filter() {
final FixedBitSet bs = getBits();
@Override
public DocIdSet getDocIdSet(final LeafReaderContext context, Bits acceptDocs) {
LeafReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
if (context.isTopLevel) {
return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bs), acceptDocs);
}
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
// one past the max doc in this segment.
final int max = base + maxDoc;
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int pos = base - 1;
int adjustedDoc = -1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() {
// TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
pos = bs.nextSetBit(pos + 1);
return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
}
@Override
public int advance(int target) {
if (target == NO_MORE_DOCS)
return adjustedDoc = NO_MORE_DOCS;
pos = bs.nextSetBit(target + base);
return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
}
@Override
public long cost() {
return bs.length();
}
};
}
@Override
public long ramBytesUsed() {
return bs.ramBytesUsed();
}
@Override
public Bits bits() {
// sparse filters should not use random access
return null;
}
}, acceptDocs2);
}
@Override
public String toString(String field) {
return "DocSetTopFilter";
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) && Objects.equals(bs, getClass().cast(other).bs);
}
@Override
public int hashCode() {
return classHash() ^ bs.hashCode();
}
};
}
use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class DocSetUtil method createSmallSet.
private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {
int[] docs = new int[maxPossible];
int sz = 0;
for (int i = firstReader; i < postList.length; i++) {
PostingsEnum postings = postList[i];
if (postings == null)
continue;
LeafReaderContext ctx = leaves.get(i);
Bits liveDocs = ctx.reader().getLiveDocs();
int base = ctx.docBase;
for (; ; ) {
int subId = postings.nextDoc();
if (subId == DocIdSetIterator.NO_MORE_DOCS)
break;
if (liveDocs != null && !liveDocs.get(subId))
continue;
int globalId = subId + base;
docs[sz++] = globalId;
}
}
return new SortedIntDocSet(docs, sz);
}
use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class DocSetUtil method createBigSet.
private static DocSet createBigSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxDoc, int firstReader) throws IOException {
long[] bits = new long[FixedBitSet.bits2words(maxDoc)];
int sz = 0;
for (int i = firstReader; i < postList.length; i++) {
PostingsEnum postings = postList[i];
if (postings == null)
continue;
LeafReaderContext ctx = leaves.get(i);
Bits liveDocs = ctx.reader().getLiveDocs();
int base = ctx.docBase;
for (; ; ) {
int subId = postings.nextDoc();
if (subId == DocIdSetIterator.NO_MORE_DOCS)
break;
if (liveDocs != null && !liveDocs.get(subId))
continue;
int globalId = subId + base;
bits[globalId >> 6] |= (1L << globalId);
sz++;
}
}
BitDocSet docSet = new BitDocSet(new FixedBitSet(bits, maxDoc), sz);
int smallSetSize = smallSetSize(maxDoc);
if (sz < smallSetSize) {
// make this optional?
DocSet smallSet = toSmallSet(docSet);
// assert equals(docSet, smallSet);
return smallSet;
}
return docSet;
}
Aggregations