Search in sources :

Example 1 with EmptyDocValuesProducer

use of org.apache.lucene.index.EmptyDocValuesProducer in project lucene-solr by apache.

the class DocValuesConsumer method mergeSortedSetField.

/**
   * Merges the sortedset docvalues from <code>toMerge</code>.
   * <p>
   * The default implementation calls {@link #addSortedSetField}, passing
   * an Iterable that merges ordinals and values and filters deleted documents .
   */
public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
    List<SortedSetDocValues> toMerge = new ArrayList<>();
    for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
        SortedSetDocValues values = null;
        DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
        if (docValuesProducer != null) {
            FieldInfo fieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
            if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
                values = docValuesProducer.getSortedSet(fieldInfo);
            }
        }
        if (values == null) {
            values = DocValues.emptySortedSet();
        }
        toMerge.add(values);
    }
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum[] liveTerms = new TermsEnum[toMerge.size()];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
        SortedSetDocValues dv = toMerge.get(sub);
        Bits liveDocs = mergeState.liveDocs[sub];
        if (liveDocs == null) {
            liveTerms[sub] = dv.termsEnum();
            weights[sub] = dv.getValueCount();
        } else {
            LongBitSet bitset = new LongBitSet(dv.getValueCount());
            int docID;
            while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
                if (liveDocs.get(docID)) {
                    long ord;
                    while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                        bitset.set(ord);
                    }
                }
            }
            liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
            weights[sub] = bitset.cardinality();
        }
    }
    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
    // step 3: add field
    addSortedSetField(mergeFieldInfo, new EmptyDocValuesProducer() {

        @Override
        public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
            if (fieldInfo != mergeFieldInfo) {
                throw new IllegalArgumentException("wrong FieldInfo");
            }
            // We must make new iterators + DocIDMerger for each iterator:
            List<SortedSetDocValuesSub> subs = new ArrayList<>();
            long cost = 0;
            for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
                SortedSetDocValues values = null;
                DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
                if (docValuesProducer != null) {
                    FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
                    if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
                        values = docValuesProducer.getSortedSet(readerFieldInfo);
                    }
                }
                if (values == null) {
                    values = DocValues.emptySortedSet();
                }
                cost += values.cost();
                subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
            }
            final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
            final long finalCost = cost;
            return new SortedSetDocValues() {

                private int docID = -1;

                private SortedSetDocValuesSub currentSub;

                @Override
                public int docID() {
                    return docID;
                }

                @Override
                public int nextDoc() throws IOException {
                    currentSub = docIDMerger.next();
                    if (currentSub == null) {
                        docID = NO_MORE_DOCS;
                    } else {
                        docID = currentSub.mappedDocID;
                    }
                    return docID;
                }

                @Override
                public int advance(int target) throws IOException {
                    throw new UnsupportedOperationException();
                }

                @Override
                public boolean advanceExact(int target) throws IOException {
                    throw new UnsupportedOperationException();
                }

                @Override
                public long nextOrd() throws IOException {
                    long subOrd = currentSub.values.nextOrd();
                    if (subOrd == NO_MORE_ORDS) {
                        return NO_MORE_ORDS;
                    }
                    return currentSub.map.get(subOrd);
                }

                @Override
                public long cost() {
                    return finalCost;
                }

                @Override
                public BytesRef lookupOrd(long ord) throws IOException {
                    int segmentNumber = map.getFirstSegmentNumber(ord);
                    long segmentOrd = map.getFirstSegmentOrd(ord);
                    return toMerge.get(segmentNumber).lookupOrd(segmentOrd);
                }

                @Override
                public long getValueCount() {
                    return map.getValueCount();
                }
            };
        }
    });
}
Also used : ArrayList(java.util.ArrayList) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) LongBitSet(org.apache.lucene.util.LongBitSet) IOException(java.io.IOException) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermsEnum(org.apache.lucene.index.TermsEnum) FilteredTermsEnum(org.apache.lucene.index.FilteredTermsEnum) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DocIDMerger(org.apache.lucene.index.DocIDMerger) Bits(org.apache.lucene.util.Bits) ArrayList(java.util.ArrayList) List(java.util.List) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with EmptyDocValuesProducer

use of org.apache.lucene.index.EmptyDocValuesProducer in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method addNumericField.

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    meta.writeInt(field.number);
    meta.writeByte(Lucene70DocValuesFormat.NUMERIC);
    writeValues(field, new EmptyDocValuesProducer() {

        @Override
        public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
            return DocValues.singleton(valuesProducer.getNumeric(field));
        }
    });
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) IOException(java.io.IOException) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 3 with EmptyDocValuesProducer

use of org.apache.lucene.index.EmptyDocValuesProducer in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method addSortedSetField.

@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    meta.writeInt(field.number);
    meta.writeByte(Lucene70DocValuesFormat.SORTED_SET);
    SortedSetDocValues values = valuesProducer.getSortedSet(field);
    int numDocsWithField = 0;
    long numOrds = 0;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        numDocsWithField++;
        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            numOrds++;
        }
    }
    if (numDocsWithField == numOrds) {
        meta.writeByte((byte) 0);
        doAddSortedField(field, new EmptyDocValuesProducer() {

            @Override
            public SortedDocValues getSorted(FieldInfo field) throws IOException {
                return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
            }
        });
        return;
    }
    meta.writeByte((byte) 1);
    assert numDocsWithField != 0;
    if (numDocsWithField == maxDoc) {
        meta.writeLong(-1);
        meta.writeLong(0L);
    } else {
        long offset = data.getFilePointer();
        meta.writeLong(offset);
        values = valuesProducer.getSortedSet(field);
        IndexedDISI.writeBitSet(values, data);
        meta.writeLong(data.getFilePointer() - offset);
    }
    int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
    meta.writeByte((byte) numberOfBitsPerOrd);
    long start = data.getFilePointer();
    meta.writeLong(start);
    DirectWriter writer = DirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            writer.add(ord);
        }
    }
    writer.finish();
    meta.writeLong(data.getFilePointer() - start);
    meta.writeInt(numDocsWithField);
    start = data.getFilePointer();
    meta.writeLong(start);
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
    final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long addr = 0;
    addressesWriter.add(addr);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        values.nextOrd();
        addr++;
        while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
            addr++;
        }
        addressesWriter.add(addr);
    }
    addressesWriter.finish();
    meta.writeLong(data.getFilePointer() - start);
    addTermsDict(values);
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DirectWriter(org.apache.lucene.util.packed.DirectWriter) IOException(java.io.IOException) DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter) FieldInfo(org.apache.lucene.index.FieldInfo) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 4 with EmptyDocValuesProducer

use of org.apache.lucene.index.EmptyDocValuesProducer in project lucene-solr by apache.

the class DocValuesConsumer method mergeSortedField.

/**
   * Merges the sorted docvalues from <code>toMerge</code>.
   * <p>
   * The default implementation calls {@link #addSortedField}, passing
   * an Iterable that merges ordinals and values and filters deleted documents .
   */
public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState) throws IOException {
    List<SortedDocValues> toMerge = new ArrayList<>();
    for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
        SortedDocValues values = null;
        DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
        if (docValuesProducer != null) {
            FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
            if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
                values = docValuesProducer.getSorted(fieldInfo);
            }
        }
        if (values == null) {
            values = DocValues.emptySorted();
        }
        toMerge.add(values);
    }
    final int numReaders = toMerge.size();
    final SortedDocValues[] dvs = toMerge.toArray(new SortedDocValues[numReaders]);
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum[] liveTerms = new TermsEnum[dvs.length];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < numReaders; sub++) {
        SortedDocValues dv = dvs[sub];
        Bits liveDocs = mergeState.liveDocs[sub];
        if (liveDocs == null) {
            liveTerms[sub] = dv.termsEnum();
            weights[sub] = dv.getValueCount();
        } else {
            LongBitSet bitset = new LongBitSet(dv.getValueCount());
            int docID;
            while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
                if (liveDocs.get(docID)) {
                    int ord = dv.ordValue();
                    if (ord >= 0) {
                        bitset.set(ord);
                    }
                }
            }
            liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
            weights[sub] = bitset.cardinality();
        }
    }
    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
    // step 3: add field
    addSortedField(fieldInfo, new EmptyDocValuesProducer() {

        @Override
        public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException {
            if (fieldInfoIn != fieldInfo) {
                throw new IllegalArgumentException("wrong FieldInfo");
            }
            // We must make new iterators + DocIDMerger for each iterator:
            List<SortedDocValuesSub> subs = new ArrayList<>();
            long cost = 0;
            for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
                SortedDocValues values = null;
                DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
                if (docValuesProducer != null) {
                    FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
                    if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
                        values = docValuesProducer.getSorted(readerFieldInfo);
                    }
                }
                if (values == null) {
                    values = DocValues.emptySorted();
                }
                cost += values.cost();
                subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
            }
            final long finalCost = cost;
            final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
            return new SortedDocValues() {

                private int docID = -1;

                private int ord;

                @Override
                public int docID() {
                    return docID;
                }

                @Override
                public int nextDoc() throws IOException {
                    SortedDocValuesSub sub = docIDMerger.next();
                    if (sub == null) {
                        return docID = NO_MORE_DOCS;
                    }
                    int subOrd = sub.values.ordValue();
                    assert subOrd != -1;
                    ord = (int) sub.map.get(subOrd);
                    docID = sub.mappedDocID;
                    return docID;
                }

                @Override
                public int ordValue() {
                    return ord;
                }

                @Override
                public int advance(int target) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public boolean advanceExact(int target) throws IOException {
                    throw new UnsupportedOperationException();
                }

                @Override
                public long cost() {
                    return finalCost;
                }

                @Override
                public int getValueCount() {
                    return (int) map.getValueCount();
                }

                @Override
                public BytesRef lookupOrd(int ord) throws IOException {
                    int segmentNumber = map.getFirstSegmentNumber(ord);
                    int segmentOrd = (int) map.getFirstSegmentOrd(ord);
                    return dvs[segmentNumber].lookupOrd(segmentOrd);
                }
            };
        }
    });
}
Also used : ArrayList(java.util.ArrayList) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) LongBitSet(org.apache.lucene.util.LongBitSet) IOException(java.io.IOException) SortedDocValues(org.apache.lucene.index.SortedDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermsEnum(org.apache.lucene.index.TermsEnum) FilteredTermsEnum(org.apache.lucene.index.FilteredTermsEnum) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DocIDMerger(org.apache.lucene.index.DocIDMerger) Bits(org.apache.lucene.util.Bits) ArrayList(java.util.ArrayList) List(java.util.List) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

IOException (java.io.IOException)4 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)4 FieldInfo (org.apache.lucene.index.FieldInfo)4 ArrayList (java.util.ArrayList)2 List (java.util.List)2 DocIDMerger (org.apache.lucene.index.DocIDMerger)2 FilteredTermsEnum (org.apache.lucene.index.FilteredTermsEnum)2 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)2 SortedDocValues (org.apache.lucene.index.SortedDocValues)2 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)2 TermsEnum (org.apache.lucene.index.TermsEnum)2 Bits (org.apache.lucene.util.Bits)2 BytesRef (org.apache.lucene.util.BytesRef)2 LongBitSet (org.apache.lucene.util.LongBitSet)2 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)1 DirectMonotonicWriter (org.apache.lucene.util.packed.DirectMonotonicWriter)1 DirectWriter (org.apache.lucene.util.packed.DirectWriter)1