use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestSegmentMerger method testBuildDocMap.
public void testBuildDocMap() {
final int maxDoc = TestUtil.nextInt(random(), 1, 128);
final int numDocs = TestUtil.nextInt(random(), 0, maxDoc);
final FixedBitSet liveDocs = new FixedBitSet(maxDoc);
for (int i = 0; i < numDocs; ++i) {
while (true) {
final int docID = random().nextInt(maxDoc);
if (!liveDocs.get(docID)) {
liveDocs.set(docID);
break;
}
}
}
final PackedLongValues docMap = MergeState.removeDeletes(maxDoc, liveDocs);
// assert the mapping is compact
for (int i = 0, del = 0; i < maxDoc; ++i) {
if (liveDocs.get(i) == false) {
++del;
} else {
assertEquals(i - del, docMap.get(i));
}
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SimpleTextBKDWriter method build.
/* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
private void build(int nodeID, int leafNodeOffset, MutablePointValues reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= maxPointsInLeafNode;
// Compute common prefixes
Arrays.fill(commonPrefixLengths, bytesPerDim);
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim = 0; dim < numDims; dim++) {
final int offset = dim * bytesPerDim;
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j] != scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[numDims];
for (int dim = 0; dim < numDims; ++dim) {
if (commonPrefixLengths[dim] < bytesPerDim) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim = 0; dim < numDims; dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDims; ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
// sort by sortedDim
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = from; i < to; ++i) {
docIDs[i - from] = reader.getDocID(i);
}
writeLeafBlockDocs(out, docIDs, 0, count);
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@Override
public BytesRef apply(int i) {
reader.getValue(from + i, scratchBytesRef1);
return scratchBytesRef1;
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
// inner node
// compute the split dimension and partition around it
final int splitDim = split(minPackedValue, maxPackedValue);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = bytesPerDim;
for (int i = 0; i < bytesPerDim; ++i) {
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
commonPrefixLen = i;
break;
}
}
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
final int address = nodeID * (1 + bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
// recurse
build(nodeID * 2, leafNodeOffset, reader, from, mid, out, minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out, minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestScorerPerf method addClause.
FixedBitSet addClause(BooleanQuery.Builder bq, FixedBitSet result) {
final FixedBitSet rnd = sets[random().nextInt(sets.length)];
Query q = new BitSetQuery(rnd);
bq.add(q, BooleanClause.Occur.MUST);
if (validate) {
if (result == null)
result = rnd.clone();
else
result.and(rnd);
}
return result;
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestScorerPerf method doConjunctions.
public int doConjunctions(int iter, int maxClauses) throws IOException {
int ret = 0;
for (int i = 0; i < iter; i++) {
// min 2 clauses
int nClauses = random().nextInt(maxClauses - 1) + 2;
BooleanQuery.Builder bq = new BooleanQuery.Builder();
FixedBitSet result = null;
for (int j = 0; j < nClauses; j++) {
result = addClause(bq, result);
}
CountingHitCollector hc = validate ? new MatchingHitCollector(result) : new CountingHitCollector();
s.search(bq.build(), hc);
ret += hc.getSum();
if (validate)
assertEquals(result.cardinality(), hc.getCount());
// System.out.println(hc.getCount());
}
return ret;
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestScorerPerf method doNestedConjunctions.
public int doNestedConjunctions(int iter, int maxOuterClauses, int maxClauses) throws IOException {
int ret = 0;
long nMatches = 0;
for (int i = 0; i < iter; i++) {
int oClauses = random().nextInt(maxOuterClauses - 1) + 2;
BooleanQuery.Builder oq = new BooleanQuery.Builder();
FixedBitSet result = null;
for (int o = 0; o < oClauses; o++) {
// min 2 clauses
int nClauses = random().nextInt(maxClauses - 1) + 2;
BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (int j = 0; j < nClauses; j++) {
result = addClause(bq, result);
}
oq.add(bq.build(), BooleanClause.Occur.MUST);
}
// outer
CountingHitCollector hc = validate ? new MatchingHitCollector(result) : new CountingHitCollector();
s.search(oq.build(), hc);
nMatches += hc.getCount();
ret += hc.getSum();
if (validate)
assertEquals(result.cardinality(), hc.getCount());
// System.out.println(hc.getCount());
}
if (VERBOSE)
System.out.println("Average number of matches=" + (nMatches / iter));
return ret;
}
Aggregations