Search in sources :

Example 1 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TrieFloatField method getSingleValueSource.

@Override
protected ValueSource getSingleValueSource(SortedSetSelector.Type choice, SchemaField f) {
    return new SortedSetFieldSource(f.getName(), choice) {

        @Override
        public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
            // needed for nested anon class ref
            SortedSetFieldSource thisAsSortedSetFieldSource = this;
            SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field);
            SortedDocValues view = SortedSetSelector.wrap(sortedSet, selector);
            return new FloatDocValues(thisAsSortedSetFieldSource) {

                private int lastDocID;

                private boolean setDoc(int docID) throws IOException {
                    if (docID < lastDocID) {
                        throw new IllegalArgumentException("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
                    }
                    if (docID > view.docID()) {
                        return docID == view.advance(docID);
                    } else {
                        return docID == view.docID();
                    }
                }

                @Override
                public float floatVal(int doc) throws IOException {
                    if (setDoc(doc)) {
                        BytesRef bytes = view.binaryValue();
                        assert bytes.length > 0;
                        return NumericUtils.sortableIntToFloat(LegacyNumericUtils.prefixCodedToInt(bytes));
                    } else {
                        return 0F;
                    }
                }

                @Override
                public boolean exists(int doc) throws IOException {
                    return setDoc(doc);
                }

                @Override
                public ValueFiller getValueFiller() {
                    return new ValueFiller() {

                        private final MutableValueFloat mval = new MutableValueFloat();

                        @Override
                        public MutableValue getValue() {
                            return mval;
                        }

                        @Override
                        public void fillValue(int doc) throws IOException {
                            if (setDoc(doc)) {
                                mval.exists = true;
                                mval.value = NumericUtils.sortableIntToFloat(LegacyNumericUtils.prefixCodedToInt(view.binaryValue()));
                            } else {
                                mval.exists = false;
                                mval.value = 0F;
                            }
                        }
                    };
                }
            };
        }
    };
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) MutableValueFloat(org.apache.lucene.util.mutable.MutableValueFloat) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) FloatDocValues(org.apache.lucene.queries.function.docvalues.FloatDocValues) Map(java.util.Map) SortedSetFieldSource(org.apache.lucene.queries.function.valuesource.SortedSetFieldSource) SortedDocValues(org.apache.lucene.index.SortedDocValues) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class DocValuesFacets method getCounts.

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter, FacetDebugInfo fdebug) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<>();
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    final SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    final BytesRefBuilder prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRefBuilder();
        prefixRef.copyChars(prefix);
    }
    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef.get());
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef.get());
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = (int) si.getValueCount();
    }
    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRefBuilder charsRef = new CharsRefBuilder();
    if (nTerms > 0 && docs.size() >= mincount) {
        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];
        if (fdebug != null) {
            fdebug.putInfoItem("numBuckets", nTerms);
        }
        Filter filter = docs.getTopFilter();
        List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
        for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
            LeafReaderContext leaf = leaves.get(subIndex);
            // solr docsets already exclude any deleted docs
            DocIdSet dis = filter.getDocIdSet(leaf, null);
            DocIdSetIterator disi = null;
            if (dis != null) {
                disi = dis.iterator();
            }
            if (disi != null) {
                if (multiValued) {
                    SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySortedSet();
                    }
                    final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                    if (singleton != null) {
                        // some codecs may optimize SORTED_SET storage for single-valued fields
                        accumSingle(counts, startTermIndex, singleton, disi, subIndex, ordinalMap);
                    } else {
                        accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                    }
                } else {
                    SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySorted();
                    }
                    accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                }
            }
        }
        if (startTermIndex == -1) {
            missingCount = counts[0];
        }
        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'
        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);
            // the smallest value in the top 'N' values
            int min = mincount - 1;
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    if (termFilter != null) {
                        final BytesRef term = si.lookupOrd(startTermIndex + i);
                        if (!termFilter.test(term)) {
                            continue;
                        }
                    }
                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }
            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;
            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);
            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                final BytesRef term = si.lookupOrd(startTermIndex + tnum);
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
            }
        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0 && termFilter == null) {
                // if mincount<=0 and we're not examining the values for the term filter, then
                // we won't discard any terms and we know exactly where to start.
                i += off;
                off = 0;
            }
            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount)
                    continue;
                BytesRef term = null;
                if (termFilter != null) {
                    term = si.lookupOrd(startTermIndex + i);
                    if (!termFilter.test(term)) {
                        continue;
                    }
                }
                if (--off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                if (term == null) {
                    term = si.lookupOrd(startTermIndex + i);
                }
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
            }
        }
    }
    return finalize(res, searcher, schemaField, docs, missingCount, missing);
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) LongPriorityQueue(org.apache.solr.util.LongPriorityQueue) NamedList(org.apache.solr.common.util.NamedList) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 3 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class DocValuesStats method getCounts.

public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {
    final SchemaField schemaField = statsField.getSchemaField();
    assert null != statsField.getSchemaField() : "DocValuesStats requires a StatsField using a SchemaField";
    final String fieldName = schemaField.getName();
    final FieldType ft = schemaField.getType();
    final StatsValues res = StatsValuesFactory.createStatsValues(statsField);
    //Initialize facetstats, if facets have been passed in
    final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
    int upto = 0;
    for (String facetField : facet) {
        SchemaField fsf = searcher.getSchema().getField(facetField);
        if (fsf.multiValued()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField);
        }
        SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
        facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
    }
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        si = DocValues.emptySortedSet();
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    int missingDocCountTotal = 0;
    final int nTerms = (int) si.getValueCount();
    // count collection array only needs to be as big as the number of terms we are
    // going to collect counts for.
    final int[] counts = new int[nTerms];
    Filter filter = docs.getTopFilter();
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        LeafReaderContext leaf = leaves.get(subIndex);
        // solr docsets already exclude any deleted docs
        DocIdSet dis = filter.getDocIdSet(leaf, null);
        DocIdSetIterator disi = null;
        if (dis != null) {
            disi = dis.iterator();
        }
        if (disi != null) {
            int docBase = leaf.docBase;
            if (multiValued) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySortedSet();
                }
                SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap);
                } else {
                    missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySorted();
                }
                missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
            }
        }
    }
    // add results in index order
    for (int ord = 0; ord < counts.length; ord++) {
        int count = counts[ord];
        if (count > 0) {
            final BytesRef value = si.lookupOrd(ord);
            res.accumulate(value, count);
            for (FieldFacetStats f : facetStats) {
                f.accumulateTermNum(ord, value);
            }
        }
    }
    res.addMissing(missingDocCountTotal);
    if (facetStats.length > 0) {
        for (FieldFacetStats f : facetStats) {
            Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
            f.accumulateMissing();
            res.addFacet(f.name, facetStatsValues);
        }
    }
    return res;
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) FieldFacetStats(org.apache.solr.handler.component.FieldFacetStats) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) StatsValues(org.apache.solr.handler.component.StatsValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class IntervalFacets method getCountString.

private void getCountString() throws IOException {
    Filter filter = docs.getTopFilter();
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        LeafReaderContext leaf = leaves.get(subIndex);
        // solr docsets already exclude any deleted docs
        DocIdSet dis = filter.getDocIdSet(leaf, null);
        if (dis == null) {
            continue;
        }
        DocIdSetIterator disi = dis.iterator();
        if (disi != null) {
            if (schemaField.multiValued()) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(schemaField.getName());
                if (sub == null) {
                    continue;
                }
                final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    accumIntervalsSingle(singleton, disi, dis.bits());
                } else {
                    accumIntervalsMulti(sub, disi, dis.bits());
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(schemaField.getName());
                if (sub == null) {
                    continue;
                }
                accumIntervalsSingle(sub, disi, dis.bits());
            }
        }
    }
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSet(org.apache.lucene.search.DocIdSet) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 5 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class UninvertingReader method getSortedSetDocValues.

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    SortedSetDocValues values = in.getSortedSetDocValues(field);
    if (values != null) {
        return values;
    }
    Type v = getType(field);
    if (v != null) {
        switch(v) {
            case SORTED_SET_INTEGER:
            case SORTED_SET_FLOAT:
                return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
            case SORTED_SET_LONG:
            case SORTED_SET_DOUBLE:
                return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
            case SORTED_SET_BINARY:
                return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
        }
    }
    return null;
}
Also used : DocValuesType(org.apache.lucene.index.DocValuesType) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues)

Aggregations

SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)55 BytesRef (org.apache.lucene.util.BytesRef)34 Document (org.apache.lucene.document.Document)25 LeafReader (org.apache.lucene.index.LeafReader)24 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)23 SortedDocValues (org.apache.lucene.index.SortedDocValues)23 Directory (org.apache.lucene.store.Directory)22 DirectoryReader (org.apache.lucene.index.DirectoryReader)19 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)17 IndexWriter (org.apache.lucene.index.IndexWriter)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)11 NumericDocValues (org.apache.lucene.index.NumericDocValues)10 ArrayList (java.util.ArrayList)9 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)9 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)8 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)7