use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class IndexedDISI method writeBitSet.
static void writeBitSet(DocIdSetIterator it, IndexOutput out) throws IOException {
int i = 0;
final FixedBitSet buffer = new FixedBitSet(1 << 16);
int prevBlock = -1;
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
final int block = doc >>> 16;
if (prevBlock != -1 && block != prevBlock) {
flush(prevBlock, buffer, i, out);
buffer.clear(0, buffer.length());
prevBlock = block;
i = 0;
}
buffer.set(doc & 0xFFFF);
i++;
prevBlock = block;
}
if (i > 0) {
flush(prevBlock, buffer, i, out);
buffer.clear(0, buffer.length());
}
// NO_MORE_DOCS is stored explicitly
buffer.set(DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
flush(DocIdSetIterator.NO_MORE_DOCS >>> 16, buffer, 1, out);
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class BKDWriter method build.
/* Recursively reorders the provided reader and writes the bkd-tree on the fly; this method is used
* when we are writing a new segment directly from IndexWriter's indexing buffer (MutablePointsReader). */
private void build(int nodeID, int leafNodeOffset, MutablePointValues reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, int[] parentSplits, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= maxPointsInLeafNode;
// Compute common prefixes
Arrays.fill(commonPrefixLengths, bytesPerDim);
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim = 0; dim < numDims; dim++) {
final int offset = dim * bytesPerDim;
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j] != scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[numDims];
for (int dim = 0; dim < numDims; ++dim) {
if (commonPrefixLengths[dim] < bytesPerDim) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim = 0; dim < numDims; dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDims; ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
// sort by sortedDim
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
assert scratchOut.getPosition() == 0;
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = from; i < to; ++i) {
docIDs[i - from] = reader.getDocID(i);
}
//System.out.println("writeLeafBlock pos=" + out.getFilePointer());
writeLeafBlockDocs(scratchOut, docIDs, 0, count);
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
writeCommonPrefixes(scratchOut, commonPrefixLengths, scratch1);
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@Override
public BytesRef apply(int i) {
reader.getValue(from + i, scratchBytesRef1);
return scratchBytesRef1;
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues);
out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
scratchOut.reset();
} else {
// inner node
// compute the split dimension and partition around it
final int splitDim = split(minPackedValue, maxPackedValue, parentSplits);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = bytesPerDim;
for (int i = 0; i < bytesPerDim; ++i) {
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
commonPrefixLen = i;
break;
}
}
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
final int address = nodeID * (1 + bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
// recurse
parentSplits[splitDim]++;
build(nodeID * 2, leafNodeOffset, reader, from, mid, out, minPackedValue, maxSplitPackedValue, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out, minSplitPackedValue, maxPackedValue, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
parentSplits[splitDim]--;
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestIndexSorting method testRandom1.
public void testRandom1() throws IOException {
boolean withDeletes = random().nextBoolean();
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
final int numDocs = atLeast(1000);
final FixedBitSet deleted = new FixedBitSet(numDocs);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
w.addDocument(doc);
if (random().nextInt(5) == 0) {
w.getReader().close();
} else if (random().nextInt(30) == 0) {
w.forceMerge(2);
} else if (random().nextInt(4) == 0) {
final int id = TestUtil.nextInt(random(), 0, i);
deleted.set(id);
w.deleteDocuments(new Term("id", Integer.toString(id)));
}
}
// Check that segments are sorted
DirectoryReader reader = w.getReader();
for (LeafReaderContext ctx : reader.leaves()) {
final SegmentReader leaf = (SegmentReader) ctx.reader();
SegmentInfo info = leaf.getSegmentInfo().info;
switch(info.getDiagnostics().get(IndexWriter.SOURCE)) {
case IndexWriter.SOURCE_FLUSH:
case IndexWriter.SOURCE_MERGE:
assertEquals(indexSort, info.getIndexSort());
final NumericDocValues values = leaf.getNumericDocValues("foo");
long previous = Long.MIN_VALUE;
for (int i = 0; i < leaf.maxDoc(); ++i) {
assertEquals(i, values.nextDoc());
final long value = values.longValue();
assertTrue(value >= previous);
previous = value;
}
break;
default:
fail();
}
}
// Now check that the index is consistent
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numDocs; ++i) {
TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
final TopDocs topDocs = searcher.search(termQuery, 1);
if (deleted.get(i)) {
assertEquals(0, topDocs.totalHits);
} else {
assertEquals(1, topDocs.totalHits);
NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
assertEquals(i, values.longValue());
Document document = reader.document(topDocs.scoreDocs[0].doc);
assertEquals(Integer.toString(i), document.get("id"));
}
}
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestIndexSorting method testMultiValuedRandom1.
public void testMultiValuedRandom1() throws IOException {
boolean withDeletes = random().nextBoolean();
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
final int numDocs = atLeast(1000);
final FixedBitSet deleted = new FixedBitSet(numDocs);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
int num = random().nextInt(10);
for (int j = 0; j < num; j++) {
doc.add(new SortedNumericDocValuesField("foo", random().nextInt(2000)));
}
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
w.addDocument(doc);
if (random().nextInt(5) == 0) {
w.getReader().close();
} else if (random().nextInt(30) == 0) {
w.forceMerge(2);
} else if (random().nextInt(4) == 0) {
final int id = TestUtil.nextInt(random(), 0, i);
deleted.set(id);
w.deleteDocuments(new Term("id", Integer.toString(id)));
}
}
DirectoryReader reader = w.getReader();
// Now check that the index is consistent
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numDocs; ++i) {
TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
final TopDocs topDocs = searcher.search(termQuery, 1);
if (deleted.get(i)) {
assertEquals(0, topDocs.totalHits);
} else {
assertEquals(1, topDocs.totalHits);
NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
assertEquals(i, values.longValue());
Document document = reader.document(topDocs.scoreDocs[0].doc);
assertEquals(Integer.toString(i), document.get("id"));
}
}
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SortingLeafReader method getBinaryDocValues.
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
final BinaryDocValues oldDocValues = in.getBinaryDocValues(field);
if (oldDocValues == null)
return null;
CachedBinaryDVs dvs;
synchronized (cachedBinaryDVs) {
dvs = cachedBinaryDVs.get(field);
if (dvs == null) {
FixedBitSet docsWithField = new FixedBitSet(maxDoc());
BytesRef[] values = new BytesRef[maxDoc()];
while (true) {
int docID = oldDocValues.nextDoc();
if (docID == NO_MORE_DOCS) {
break;
}
int newDocID = docMap.oldToNew(docID);
docsWithField.set(newDocID);
values[newDocID] = BytesRef.deepCopyOf(oldDocValues.binaryValue());
}
dvs = new CachedBinaryDVs(values, docsWithField);
cachedBinaryDVs.put(field, dvs);
}
}
return new SortingBinaryDocValues(dvs);
}
Aggregations