Search in sources :

Example 16 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class Lucene54DocValuesProducer method getSortedSetWithAddresses.

private SortedSetDocValues getSortedSetWithAddresses(FieldInfo field) throws IOException {
    final long valueCount = binaries.get(field.name).count;
    // we keep the byte[]s and list of ords on disk, these could be large
    final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
    final LongValues ordinals = getNumeric(ords.get(field.name));
    // but the addresses to the ord stream are in RAM
    final LongValues ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));
    return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {

        long startOffset;

        long offset;

        long endOffset;

        @Override
        public long nextOrd() {
            if (offset == endOffset) {
                return NO_MORE_ORDS;
            } else {
                long ord = ordinals.get(offset);
                offset++;
                return ord;
            }
        }

        @Override
        public void setDocument(int docID) {
            startOffset = offset = ordIndex.get(docID);
            endOffset = ordIndex.get(docID + 1L);
        }

        @Override
        public BytesRef lookupOrd(long ord) {
            return binary.get(ord);
        }

        @Override
        public long getValueCount() {
            return valueCount;
        }

        @Override
        public long lookupTerm(BytesRef key) {
            if (binary instanceof CompressedBinaryDocValues) {
                return ((CompressedBinaryDocValues) binary).lookupTerm(key);
            } else {
                return super.lookupTerm(key);
            }
        }

        @Override
        public TermsEnum termsEnum() throws IOException {
            if (binary instanceof CompressedBinaryDocValues) {
                return ((CompressedBinaryDocValues) binary).getTermsEnum();
            } else {
                return super.termsEnum();
            }
        }
    }, maxDoc);
}
Also used : LongValues(org.apache.lucene.util.LongValues) IOException(java.io.IOException) BytesRef(org.apache.lucene.util.BytesRef)

Example 17 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class Lucene54DocValuesProducer method getSparseNumericDocValues.

private SparseNumericDocValues getSparseNumericDocValues(NumericEntry entry) throws IOException {
    final RandomAccessInput docIdsData = this.data.randomAccessSlice(entry.missingOffset, entry.offset - entry.missingOffset);
    final LongValues docIDs = DirectMonotonicReader.getInstance(entry.monotonicMeta, docIdsData);
    // cannot be sparse
    final LongValues values = getNumeric(entry.nonMissingValues);
    return new SparseNumericDocValues(Math.toIntExact(entry.numDocsWithValue), docIDs, values);
}
Also used : RandomAccessInput(org.apache.lucene.store.RandomAccessInput) LongValues(org.apache.lucene.util.LongValues)

Example 18 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class Lucene54DocValuesProducer method getVariableBinary.

private LegacyBinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
    final RandomAccessInput addressesData = this.data.randomAccessSlice(bytes.addressesOffset, bytes.addressesEndOffset - bytes.addressesOffset);
    final LongValues addresses = DirectMonotonicReader.getInstance(bytes.addressesMeta, addressesData);
    final IndexInput data = this.data.slice("var-binary", bytes.offset, bytes.addressesOffset - bytes.offset);
    final BytesRef term = new BytesRef(Math.max(0, bytes.maxLength));
    final byte[] buffer = term.bytes;
    return new LongBinaryDocValues() {

        @Override
        public BytesRef get(long id) {
            long startAddress = addresses.get(id);
            long endAddress = addresses.get(id + 1);
            int length = (int) (endAddress - startAddress);
            try {
                data.seek(startAddress);
                data.readBytes(buffer, 0, length);
                term.length = length;
                return term;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
}
Also used : RandomAccessInput(org.apache.lucene.store.RandomAccessInput) LongValues(org.apache.lucene.util.LongValues) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexInput(org.apache.lucene.store.IndexInput) IOException(java.io.IOException) BytesRef(org.apache.lucene.util.BytesRef)

Example 19 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class SortedSetDocValuesFacetCounts method countOneSegment.

private void countOneSegment(MultiDocValues.OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException {
    SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
    if (segValues == null) {
        // nothing to count
        return;
    }
    DocIdSetIterator it;
    if (hits == null) {
        it = segValues;
    } else {
        it = ConjunctionDISI.intersectIterators(Arrays.asList(hits.bits.iterator(), segValues));
    }
    // segs)
    if (ordinalMap != null) {
        final LongValues ordMap = ordinalMap.getGlobalOrds(segOrd);
        int numSegOrds = (int) segValues.getValueCount();
        if (hits != null && hits.totalHits < numSegOrds / 10) {
            // Remap every ord to global ord as we iterate:
            for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                int term = (int) segValues.nextOrd();
                while (term != SortedSetDocValues.NO_MORE_ORDS) {
                    //System.out.println("      segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term));
                    counts[(int) ordMap.get(term)]++;
                    term = (int) segValues.nextOrd();
                }
            }
        } else {
            //System.out.println("    count in seg ord first");
            // First count in seg-ord space:
            final int[] segCounts = new int[numSegOrds];
            for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                int term = (int) segValues.nextOrd();
                while (term != SortedSetDocValues.NO_MORE_ORDS) {
                    //System.out.println("      ord=" + term);
                    segCounts[term]++;
                    term = (int) segValues.nextOrd();
                }
            }
            // Then, migrate to global ords:
            for (int ord = 0; ord < numSegOrds; ord++) {
                int count = segCounts[ord];
                if (count != 0) {
                    //System.out.println("    migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord));
                    counts[(int) ordMap.get(ord)] += count;
                }
            }
        }
    } else {
        // just aggregate directly into counts:
        for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
            int term = (int) segValues.nextOrd();
            while (term != SortedSetDocValues.NO_MORE_ORDS) {
                counts[term]++;
                term = (int) segValues.nextOrd();
            }
        }
    }
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) LongValues(org.apache.lucene.util.LongValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 20 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class DocValuesStats method accumSingle.

/** accumulates per-segment single-valued stats */
static int accumSingle(int[] counts, int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
    final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
    int missingDocCount = 0;
    int doc;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        if (doc > si.docID()) {
            si.advance(doc);
        }
        if (doc == si.docID()) {
            int term = si.ordValue();
            if (map != null) {
                term = (int) ordMap.get(term);
            }
            counts[term]++;
            for (FieldFacetStats f : facetStats) {
                f.facetTermNum(docBase + doc, term);
            }
        } else {
            for (FieldFacetStats f : facetStats) {
                f.facetMissingNum(docBase + doc);
            }
            missingDocCount++;
        }
    }
    return missingDocCount;
}
Also used : FieldFacetStats(org.apache.solr.handler.component.FieldFacetStats) LongValues(org.apache.lucene.util.LongValues)

Aggregations

LongValues (org.apache.lucene.util.LongValues)31 IOException (java.io.IOException)8 RandomAccessInput (org.apache.lucene.store.RandomAccessInput)8 IndexInput (org.apache.lucene.store.IndexInput)7 BytesRef (org.apache.lucene.util.BytesRef)6 IndexOutput (org.apache.lucene.store.IndexOutput)5 Directory (org.apache.lucene.store.Directory)4 ArrayList (java.util.ArrayList)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 NumericDocValues (org.apache.lucene.index.NumericDocValues)3 SortedDocValues (org.apache.lucene.index.SortedDocValues)3 Bits (org.apache.lucene.util.Bits)3 MultiDocValues (org.apache.lucene.index.MultiDocValues)2 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)2 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)2 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)2 DirectWriter (org.apache.lucene.util.packed.DirectWriter)2 FieldFacetStats (org.apache.solr.handler.component.FieldFacetStats)2 IntHashSet (com.carrotsearch.hppc.IntHashSet)1 IntObjectHashMap (com.carrotsearch.hppc.IntObjectHashMap)1