use of org.apache.lucene.util.LongBitSet in project elasticsearch by elastic.
the class IncludeExcludeTests method testEmptyTermsWithOrds.
public void testEmptyTermsWithOrds() throws IOException {
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
assertEquals(0, acceptedOrds.length());
inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
assertEquals(0, acceptedOrds.length());
}
use of org.apache.lucene.util.LongBitSet in project lucene-solr by apache.
the class DocValuesConsumer method mergeSortedSetField.
/**
* Merges the sortedset docvalues from <code>toMerge</code>.
* <p>
* The default implementation calls {@link #addSortedSetField}, passing
* an Iterable that merges ordinals and values and filters deleted documents .
*/
public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
List<SortedSetDocValues> toMerge = new ArrayList<>();
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedSetDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo fieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
values = docValuesProducer.getSortedSet(fieldInfo);
}
}
if (values == null) {
values = DocValues.emptySortedSet();
}
toMerge.add(values);
}
// step 1: iterate thru each sub and mark terms still in use
TermsEnum[] liveTerms = new TermsEnum[toMerge.size()];
long[] weights = new long[liveTerms.length];
for (int sub = 0; sub < liveTerms.length; sub++) {
SortedSetDocValues dv = toMerge.get(sub);
Bits liveDocs = mergeState.liveDocs[sub];
if (liveDocs == null) {
liveTerms[sub] = dv.termsEnum();
weights[sub] = dv.getValueCount();
} else {
LongBitSet bitset = new LongBitSet(dv.getValueCount());
int docID;
while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
if (liveDocs.get(docID)) {
long ord;
while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
bitset.set(ord);
}
}
}
liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
weights[sub] = bitset.cardinality();
}
}
// step 2: create ordinal map (this conceptually does the "merging")
final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
// step 3: add field
addSortedSetField(mergeFieldInfo, new EmptyDocValuesProducer() {
@Override
public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
if (fieldInfo != mergeFieldInfo) {
throw new IllegalArgumentException("wrong FieldInfo");
}
// We must make new iterators + DocIDMerger for each iterator:
List<SortedSetDocValuesSub> subs = new ArrayList<>();
long cost = 0;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedSetDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
values = docValuesProducer.getSortedSet(readerFieldInfo);
}
}
if (values == null) {
values = DocValues.emptySortedSet();
}
cost += values.cost();
subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
}
final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
final long finalCost = cost;
return new SortedSetDocValues() {
private int docID = -1;
private SortedSetDocValuesSub currentSub;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
currentSub = docIDMerger.next();
if (currentSub == null) {
docID = NO_MORE_DOCS;
} else {
docID = currentSub.mappedDocID;
}
return docID;
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long nextOrd() throws IOException {
long subOrd = currentSub.values.nextOrd();
if (subOrd == NO_MORE_ORDS) {
return NO_MORE_ORDS;
}
return currentSub.map.get(subOrd);
}
@Override
public long cost() {
return finalCost;
}
@Override
public BytesRef lookupOrd(long ord) throws IOException {
int segmentNumber = map.getFirstSegmentNumber(ord);
long segmentOrd = map.getFirstSegmentOrd(ord);
return toMerge.get(segmentNumber).lookupOrd(segmentOrd);
}
@Override
public long getValueCount() {
return map.getValueCount();
}
};
}
});
}
use of org.apache.lucene.util.LongBitSet in project lucene-solr by apache.
the class BKDWriter method finish.
/** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
public long finish(IndexOutput out) throws IOException {
// Catch user silliness:
if (heapPointWriter == null && tempInput == null) {
throw new IllegalStateException("already finished");
}
if (offlinePointWriter != null) {
offlinePointWriter.close();
}
if (pointCount == 0) {
throw new IllegalStateException("must index at least one point");
}
LongBitSet ordBitSet;
if (numDims > 1) {
if (singleValuePerDoc) {
ordBitSet = new LongBitSet(maxDoc);
} else {
ordBitSet = new LongBitSet(pointCount);
}
} else {
ordBitSet = null;
}
long countPerLeaf = pointCount;
long innerNodeCount = 1;
while (countPerLeaf > maxPointsInLeafNode) {
countPerLeaf = (countPerLeaf + 1) / 2;
innerNodeCount *= 2;
}
int numLeaves = (int) innerNodeCount;
checkMaxLeafNodeCount(numLeaves);
// NOTE: we could save the 1+ here, to use a bit less heap at search time, but then we'd need a somewhat costly check at each
// step of the recursion to recompute the split dim:
// Indexed by nodeID, but first (root) nodeID is 1. We do 1+ because the lead byte at each recursion says which dim we split on.
byte[] splitPackedValues = new byte[Math.toIntExact(numLeaves * (1 + bytesPerDim))];
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
long[] leafBlockFPs = new long[numLeaves];
// Make sure the math above "worked":
assert pointCount / numLeaves <= maxPointsInLeafNode : "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
// Sort all docs once by each dimension:
PathSlice[] sortedPointWriters = new PathSlice[numDims];
// This is only used on exception; on normal code paths we close all files we opened:
List<Closeable> toCloseHeroically = new ArrayList<>();
boolean success = false;
try {
//long t0 = System.nanoTime();
for (int dim = 0; dim < numDims; dim++) {
sortedPointWriters[dim] = new PathSlice(sort(dim), 0, pointCount);
}
if (tempInput != null) {
tempDir.deleteFile(tempInput.getName());
tempInput = null;
} else {
assert heapPointWriter != null;
heapPointWriter = null;
}
final int[] parentSplits = new int[numDims];
build(1, numLeaves, sortedPointWriters, ordBitSet, out, minPackedValue, maxPackedValue, parentSplits, splitPackedValues, leafBlockFPs, toCloseHeroically);
assert Arrays.equals(parentSplits, new int[numDims]);
for (PathSlice slice : sortedPointWriters) {
slice.writer.destroy();
}
// If no exception, we should have cleaned everything up:
assert tempDir.getCreatedFiles().isEmpty();
//long t2 = System.nanoTime();
//System.out.println("write time: " + ((t2-t1)/1000000.0) + " msec");
success = true;
} finally {
if (success == false) {
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempDir.getCreatedFiles());
IOUtils.closeWhileHandlingException(toCloseHeroically);
}
}
//System.out.println("Total nodes: " + innerNodeCount);
// Write index:
long indexFP = out.getFilePointer();
writeIndex(out, Math.toIntExact(countPerLeaf), leafBlockFPs, splitPackedValues);
return indexFP;
}
use of org.apache.lucene.util.LongBitSet in project elasticsearch by elastic.
the class IncludeExcludeTests method testSingleTermWithOrds.
public void testSingleTermWithOrds() throws IOException {
RandomAccessOrds ords = new RandomAccessOrds() {
boolean consumed = true;
@Override
public void setDocument(int docID) {
consumed = false;
}
@Override
public long nextOrd() {
if (consumed) {
return SortedSetDocValues.NO_MORE_ORDS;
} else {
consumed = true;
return 0;
}
}
@Override
public BytesRef lookupOrd(long ord) {
assertEquals(0, ord);
return new BytesRef("foo");
}
@Override
public long getValueCount() {
return 1;
}
@Override
public long ordAt(int index) {
return 0;
}
@Override
public int cardinality() {
return 1;
}
};
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertTrue(acceptedOrds.get(0));
inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
inexcl = new IncludeExclude(// means everything included
null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
}
use of org.apache.lucene.util.LongBitSet in project lucene-solr by apache.
the class TestLegacyNumericUtils method assertLongRangeSplit.
// INFO: Tests for trieCodeLong()/trieCodeInt() not needed because implicitely tested by range filter tests
/** Note: The neededBounds Iterable must be unsigned (easier understanding what's happening) */
private void assertLongRangeSplit(final long lower, final long upper, int precisionStep, final boolean useBitSet, final Iterable<Long> expectedBounds, final Iterable<Integer> expectedShifts) {
// Cannot use FixedBitSet since the range could be long:
final LongBitSet bits = useBitSet ? new LongBitSet(upper - lower + 1) : null;
final Iterator<Long> neededBounds = (expectedBounds == null) ? null : expectedBounds.iterator();
final Iterator<Integer> neededShifts = (expectedShifts == null) ? null : expectedShifts.iterator();
LegacyNumericUtils.splitLongRange(new LegacyNumericUtils.LongRangeBuilder() {
@Override
public void addRange(long min, long max, int shift) {
assertTrue("min, max should be inside bounds", min >= lower && min <= upper && max >= lower && max <= upper);
if (useBitSet)
for (long l = min; l <= max; l++) {
assertFalse("ranges should not overlap", bits.getAndSet(l - lower));
// extra exit condition to prevent overflow on MAX_VALUE
if (l == max)
break;
}
if (neededBounds == null || neededShifts == null)
return;
// make unsigned longs for easier display and understanding
min ^= 0x8000000000000000L;
max ^= 0x8000000000000000L;
//System.out.println("0x"+Long.toHexString(min>>>shift)+"L,0x"+Long.toHexString(max>>>shift)+"L)/*shift="+shift+"*/,");
assertEquals("shift", neededShifts.next().intValue(), shift);
assertEquals("inner min bound", neededBounds.next().longValue(), min >>> shift);
assertEquals("inner max bound", neededBounds.next().longValue(), max >>> shift);
}
}, precisionStep, lower, upper);
if (useBitSet) {
// after flipping all bits in the range, the cardinality should be zero
bits.flip(0, upper - lower + 1);
assertEquals("The sub-range concenated should match the whole range", 0, bits.cardinality());
}
}
Aggregations