Search in sources :

Example 1 with LongBitSet

use of org.apache.lucene.util.LongBitSet in project elasticsearch by elastic.

the class IncludeExcludeTests method testEmptyTermsWithOrds.

public void testEmptyTermsWithOrds() throws IOException {
    IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
    OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
    LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
    assertEquals(0, acceptedOrds.length());
    inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
    filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
    acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
    assertEquals(0, acceptedOrds.length());
}
Also used : TreeSet(java.util.TreeSet) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) OrdinalsFilter(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.OrdinalsFilter) LongBitSet(org.apache.lucene.util.LongBitSet) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with LongBitSet

use of org.apache.lucene.util.LongBitSet in project lucene-solr by apache.

the class DocValuesConsumer method mergeSortedSetField.

/**
   * Merges the sortedset docvalues from <code>toMerge</code>.
   * <p>
   * The default implementation calls {@link #addSortedSetField}, passing
   * an Iterable that merges ordinals and values and filters deleted documents .
   */
public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
    List<SortedSetDocValues> toMerge = new ArrayList<>();
    for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
        SortedSetDocValues values = null;
        DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
        if (docValuesProducer != null) {
            FieldInfo fieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
            if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
                values = docValuesProducer.getSortedSet(fieldInfo);
            }
        }
        if (values == null) {
            values = DocValues.emptySortedSet();
        }
        toMerge.add(values);
    }
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum[] liveTerms = new TermsEnum[toMerge.size()];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
        SortedSetDocValues dv = toMerge.get(sub);
        Bits liveDocs = mergeState.liveDocs[sub];
        if (liveDocs == null) {
            liveTerms[sub] = dv.termsEnum();
            weights[sub] = dv.getValueCount();
        } else {
            LongBitSet bitset = new LongBitSet(dv.getValueCount());
            int docID;
            while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
                if (liveDocs.get(docID)) {
                    long ord;
                    while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                        bitset.set(ord);
                    }
                }
            }
            liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
            weights[sub] = bitset.cardinality();
        }
    }
    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
    // step 3: add field
    addSortedSetField(mergeFieldInfo, new EmptyDocValuesProducer() {

        @Override
        public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
            if (fieldInfo != mergeFieldInfo) {
                throw new IllegalArgumentException("wrong FieldInfo");
            }
            // We must make new iterators + DocIDMerger for each iterator:
            List<SortedSetDocValuesSub> subs = new ArrayList<>();
            long cost = 0;
            for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
                SortedSetDocValues values = null;
                DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
                if (docValuesProducer != null) {
                    FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
                    if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
                        values = docValuesProducer.getSortedSet(readerFieldInfo);
                    }
                }
                if (values == null) {
                    values = DocValues.emptySortedSet();
                }
                cost += values.cost();
                subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
            }
            final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
            final long finalCost = cost;
            return new SortedSetDocValues() {

                private int docID = -1;

                private SortedSetDocValuesSub currentSub;

                @Override
                public int docID() {
                    return docID;
                }

                @Override
                public int nextDoc() throws IOException {
                    currentSub = docIDMerger.next();
                    if (currentSub == null) {
                        docID = NO_MORE_DOCS;
                    } else {
                        docID = currentSub.mappedDocID;
                    }
                    return docID;
                }

                @Override
                public int advance(int target) throws IOException {
                    throw new UnsupportedOperationException();
                }

                @Override
                public boolean advanceExact(int target) throws IOException {
                    throw new UnsupportedOperationException();
                }

                @Override
                public long nextOrd() throws IOException {
                    long subOrd = currentSub.values.nextOrd();
                    if (subOrd == NO_MORE_ORDS) {
                        return NO_MORE_ORDS;
                    }
                    return currentSub.map.get(subOrd);
                }

                @Override
                public long cost() {
                    return finalCost;
                }

                @Override
                public BytesRef lookupOrd(long ord) throws IOException {
                    int segmentNumber = map.getFirstSegmentNumber(ord);
                    long segmentOrd = map.getFirstSegmentOrd(ord);
                    return toMerge.get(segmentNumber).lookupOrd(segmentOrd);
                }

                @Override
                public long getValueCount() {
                    return map.getValueCount();
                }
            };
        }
    });
}
Also used : ArrayList(java.util.ArrayList) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) LongBitSet(org.apache.lucene.util.LongBitSet) IOException(java.io.IOException) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermsEnum(org.apache.lucene.index.TermsEnum) FilteredTermsEnum(org.apache.lucene.index.FilteredTermsEnum) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DocIDMerger(org.apache.lucene.index.DocIDMerger) Bits(org.apache.lucene.util.Bits) ArrayList(java.util.ArrayList) List(java.util.List) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with LongBitSet

use of org.apache.lucene.util.LongBitSet in project lucene-solr by apache.

the class BKDWriter method finish.

/** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
public long finish(IndexOutput out) throws IOException {
    // Catch user silliness:
    if (heapPointWriter == null && tempInput == null) {
        throw new IllegalStateException("already finished");
    }
    if (offlinePointWriter != null) {
        offlinePointWriter.close();
    }
    if (pointCount == 0) {
        throw new IllegalStateException("must index at least one point");
    }
    LongBitSet ordBitSet;
    if (numDims > 1) {
        if (singleValuePerDoc) {
            ordBitSet = new LongBitSet(maxDoc);
        } else {
            ordBitSet = new LongBitSet(pointCount);
        }
    } else {
        ordBitSet = null;
    }
    long countPerLeaf = pointCount;
    long innerNodeCount = 1;
    while (countPerLeaf > maxPointsInLeafNode) {
        countPerLeaf = (countPerLeaf + 1) / 2;
        innerNodeCount *= 2;
    }
    int numLeaves = (int) innerNodeCount;
    checkMaxLeafNodeCount(numLeaves);
    // NOTE: we could save the 1+ here, to use a bit less heap at search time, but then we'd need a somewhat costly check at each
    // step of the recursion to recompute the split dim:
    // Indexed by nodeID, but first (root) nodeID is 1.  We do 1+ because the lead byte at each recursion says which dim we split on.
    byte[] splitPackedValues = new byte[Math.toIntExact(numLeaves * (1 + bytesPerDim))];
    // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
    long[] leafBlockFPs = new long[numLeaves];
    // Make sure the math above "worked":
    assert pointCount / numLeaves <= maxPointsInLeafNode : "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
    // Sort all docs once by each dimension:
    PathSlice[] sortedPointWriters = new PathSlice[numDims];
    // This is only used on exception; on normal code paths we close all files we opened:
    List<Closeable> toCloseHeroically = new ArrayList<>();
    boolean success = false;
    try {
        //long t0 = System.nanoTime();
        for (int dim = 0; dim < numDims; dim++) {
            sortedPointWriters[dim] = new PathSlice(sort(dim), 0, pointCount);
        }
        if (tempInput != null) {
            tempDir.deleteFile(tempInput.getName());
            tempInput = null;
        } else {
            assert heapPointWriter != null;
            heapPointWriter = null;
        }
        final int[] parentSplits = new int[numDims];
        build(1, numLeaves, sortedPointWriters, ordBitSet, out, minPackedValue, maxPackedValue, parentSplits, splitPackedValues, leafBlockFPs, toCloseHeroically);
        assert Arrays.equals(parentSplits, new int[numDims]);
        for (PathSlice slice : sortedPointWriters) {
            slice.writer.destroy();
        }
        // If no exception, we should have cleaned everything up:
        assert tempDir.getCreatedFiles().isEmpty();
        //long t2 = System.nanoTime();
        //System.out.println("write time: " + ((t2-t1)/1000000.0) + " msec");
        success = true;
    } finally {
        if (success == false) {
            IOUtils.deleteFilesIgnoringExceptions(tempDir, tempDir.getCreatedFiles());
            IOUtils.closeWhileHandlingException(toCloseHeroically);
        }
    }
    //System.out.println("Total nodes: " + innerNodeCount);
    // Write index:
    long indexFP = out.getFilePointer();
    writeIndex(out, Math.toIntExact(countPerLeaf), leafBlockFPs, splitPackedValues);
    return indexFP;
}
Also used : Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) LongBitSet(org.apache.lucene.util.LongBitSet)

Example 4 with LongBitSet

use of org.apache.lucene.util.LongBitSet in project elasticsearch by elastic.

the class IncludeExcludeTests method testSingleTermWithOrds.

public void testSingleTermWithOrds() throws IOException {
    RandomAccessOrds ords = new RandomAccessOrds() {

        boolean consumed = true;

        @Override
        public void setDocument(int docID) {
            consumed = false;
        }

        @Override
        public long nextOrd() {
            if (consumed) {
                return SortedSetDocValues.NO_MORE_ORDS;
            } else {
                consumed = true;
                return 0;
            }
        }

        @Override
        public BytesRef lookupOrd(long ord) {
            assertEquals(0, ord);
            return new BytesRef("foo");
        }

        @Override
        public long getValueCount() {
            return 1;
        }

        @Override
        public long ordAt(int index) {
            return 0;
        }

        @Override
        public int cardinality() {
            return 1;
        }
    };
    IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
    OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
    LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
    assertEquals(1, acceptedOrds.length());
    assertTrue(acceptedOrds.get(0));
    inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
    filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
    acceptedOrds = filter.acceptedGlobalOrdinals(ords);
    assertEquals(1, acceptedOrds.length());
    assertFalse(acceptedOrds.get(0));
    inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
    filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
    acceptedOrds = filter.acceptedGlobalOrdinals(ords);
    assertEquals(1, acceptedOrds.length());
    assertFalse(acceptedOrds.get(0));
    inexcl = new IncludeExclude(// means everything included
    null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
    filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
    acceptedOrds = filter.acceptedGlobalOrdinals(ords);
    assertEquals(1, acceptedOrds.length());
    assertFalse(acceptedOrds.get(0));
}
Also used : RandomAccessOrds(org.apache.lucene.index.RandomAccessOrds) TreeSet(java.util.TreeSet) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) OrdinalsFilter(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.OrdinalsFilter) LongBitSet(org.apache.lucene.util.LongBitSet) BytesRef(org.apache.lucene.util.BytesRef)

Example 5 with LongBitSet

use of org.apache.lucene.util.LongBitSet in project lucene-solr by apache.

the class TestLegacyNumericUtils method assertLongRangeSplit.

// INFO: Tests for trieCodeLong()/trieCodeInt() not needed because implicitely tested by range filter tests
/** Note: The neededBounds Iterable must be unsigned (easier understanding what's happening) */
private void assertLongRangeSplit(final long lower, final long upper, int precisionStep, final boolean useBitSet, final Iterable<Long> expectedBounds, final Iterable<Integer> expectedShifts) {
    // Cannot use FixedBitSet since the range could be long:
    final LongBitSet bits = useBitSet ? new LongBitSet(upper - lower + 1) : null;
    final Iterator<Long> neededBounds = (expectedBounds == null) ? null : expectedBounds.iterator();
    final Iterator<Integer> neededShifts = (expectedShifts == null) ? null : expectedShifts.iterator();
    LegacyNumericUtils.splitLongRange(new LegacyNumericUtils.LongRangeBuilder() {

        @Override
        public void addRange(long min, long max, int shift) {
            assertTrue("min, max should be inside bounds", min >= lower && min <= upper && max >= lower && max <= upper);
            if (useBitSet)
                for (long l = min; l <= max; l++) {
                    assertFalse("ranges should not overlap", bits.getAndSet(l - lower));
                    // extra exit condition to prevent overflow on MAX_VALUE
                    if (l == max)
                        break;
                }
            if (neededBounds == null || neededShifts == null)
                return;
            // make unsigned longs for easier display and understanding
            min ^= 0x8000000000000000L;
            max ^= 0x8000000000000000L;
            //System.out.println("0x"+Long.toHexString(min>>>shift)+"L,0x"+Long.toHexString(max>>>shift)+"L)/*shift="+shift+"*/,");
            assertEquals("shift", neededShifts.next().intValue(), shift);
            assertEquals("inner min bound", neededBounds.next().longValue(), min >>> shift);
            assertEquals("inner max bound", neededBounds.next().longValue(), max >>> shift);
        }
    }, precisionStep, lower, upper);
    if (useBitSet) {
        // after flipping all bits in the range, the cardinality should be zero
        bits.flip(0, upper - lower + 1);
        assertEquals("The sub-range concenated should match the whole range", 0, bits.cardinality());
    }
}
Also used : LegacyNumericUtils(org.apache.solr.legacy.LegacyNumericUtils) LongBitSet(org.apache.lucene.util.LongBitSet)

Aggregations

LongBitSet (org.apache.lucene.util.LongBitSet)9 BytesRef (org.apache.lucene.util.BytesRef)6 ArrayList (java.util.ArrayList)4 IOException (java.io.IOException)3 Closeable (java.io.Closeable)2 List (java.util.List)2 TreeSet (java.util.TreeSet)2 DocIDMerger (org.apache.lucene.index.DocIDMerger)2 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)2 FieldInfo (org.apache.lucene.index.FieldInfo)2 FilteredTermsEnum (org.apache.lucene.index.FilteredTermsEnum)2 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)2 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)2 TermsEnum (org.apache.lucene.index.TermsEnum)2 Bits (org.apache.lucene.util.Bits)2 IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)2 OrdinalsFilter (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.OrdinalsFilter)2 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 TermIterator (org.apache.lucene.index.PrefixCodedTerms.TermIterator)1 RandomAccessOrds (org.apache.lucene.index.RandomAccessOrds)1