Search in sources :

Example 1 with FieldFacetStats

use of org.apache.solr.handler.component.FieldFacetStats in project lucene-solr by apache.

the class DocValuesStats method getCounts.

public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {
    final SchemaField schemaField = statsField.getSchemaField();
    assert null != statsField.getSchemaField() : "DocValuesStats requires a StatsField using a SchemaField";
    final String fieldName = schemaField.getName();
    final FieldType ft = schemaField.getType();
    final StatsValues res = StatsValuesFactory.createStatsValues(statsField);
    //Initialize facetstats, if facets have been passed in
    final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
    int upto = 0;
    for (String facetField : facet) {
        SchemaField fsf = searcher.getSchema().getField(facetField);
        if (fsf.multiValued()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField);
        }
        SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
        facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
    }
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        si = DocValues.emptySortedSet();
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    int missingDocCountTotal = 0;
    final int nTerms = (int) si.getValueCount();
    // count collection array only needs to be as big as the number of terms we are
    // going to collect counts for.
    final int[] counts = new int[nTerms];
    Filter filter = docs.getTopFilter();
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        LeafReaderContext leaf = leaves.get(subIndex);
        // solr docsets already exclude any deleted docs
        DocIdSet dis = filter.getDocIdSet(leaf, null);
        DocIdSetIterator disi = null;
        if (dis != null) {
            disi = dis.iterator();
        }
        if (disi != null) {
            int docBase = leaf.docBase;
            if (multiValued) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySortedSet();
                }
                SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap);
                } else {
                    missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySorted();
                }
                missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
            }
        }
    }
    // add results in index order
    for (int ord = 0; ord < counts.length; ord++) {
        int count = counts[ord];
        if (count > 0) {
            final BytesRef value = si.lookupOrd(ord);
            res.accumulate(value, count);
            for (FieldFacetStats f : facetStats) {
                f.accumulateTermNum(ord, value);
            }
        }
    }
    res.addMissing(missingDocCountTotal);
    if (facetStats.length > 0) {
        for (FieldFacetStats f : facetStats) {
            Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
            f.accumulateMissing();
            res.addFacet(f.name, facetStatsValues);
        }
    }
    return res;
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) FieldFacetStats(org.apache.solr.handler.component.FieldFacetStats) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) StatsValues(org.apache.solr.handler.component.StatsValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with FieldFacetStats

use of org.apache.solr.handler.component.FieldFacetStats in project lucene-solr by apache.

the class DocValuesStats method accumSingle.

/** accumulates per-segment single-valued stats */
static int accumSingle(int[] counts, int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
    final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
    int missingDocCount = 0;
    int doc;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        if (doc > si.docID()) {
            si.advance(doc);
        }
        if (doc == si.docID()) {
            int term = si.ordValue();
            if (map != null) {
                term = (int) ordMap.get(term);
            }
            counts[term]++;
            for (FieldFacetStats f : facetStats) {
                f.facetTermNum(docBase + doc, term);
            }
        } else {
            for (FieldFacetStats f : facetStats) {
                f.facetMissingNum(docBase + doc);
            }
            missingDocCount++;
        }
    }
    return missingDocCount;
}
Also used : FieldFacetStats(org.apache.solr.handler.component.FieldFacetStats) LongValues(org.apache.lucene.util.LongValues)

Example 3 with FieldFacetStats

use of org.apache.solr.handler.component.FieldFacetStats in project lucene-solr by apache.

the class DocValuesStats method accumMulti.

/** accumulates per-segment multi-valued stats */
static int accumMulti(int[] counts, int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
    final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
    int missingDocCount = 0;
    int doc;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        if (doc > si.docID()) {
            si.advance(doc);
        }
        if (doc == si.docID()) {
            long ord;
            while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                int term = (int) ord;
                if (map != null) {
                    term = (int) ordMap.get(term);
                }
                counts[term]++;
                for (FieldFacetStats f : facetStats) {
                    f.facetTermNum(docBase + doc, term);
                }
            }
        } else {
            for (FieldFacetStats f : facetStats) {
                f.facetMissingNum(docBase + doc);
            }
            missingDocCount++;
        }
    }
    return missingDocCount;
}
Also used : FieldFacetStats(org.apache.solr.handler.component.FieldFacetStats) LongValues(org.apache.lucene.util.LongValues)

Aggregations

FieldFacetStats (org.apache.solr.handler.component.FieldFacetStats)3 LongValues (org.apache.lucene.util.LongValues)2 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 MultiDocValues (org.apache.lucene.index.MultiDocValues)1 MultiSortedSetDocValues (org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues)1 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)1 SortedDocValues (org.apache.lucene.index.SortedDocValues)1 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)1 DocIdSet (org.apache.lucene.search.DocIdSet)1 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)1 BytesRef (org.apache.lucene.util.BytesRef)1 SolrException (org.apache.solr.common.SolrException)1 StatsValues (org.apache.solr.handler.component.StatsValues)1 FieldType (org.apache.solr.schema.FieldType)1 SchemaField (org.apache.solr.schema.SchemaField)1 Filter (org.apache.solr.search.Filter)1