Search in sources :

Example 1 with MultiSortedSetDocValues

use of org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues in project lucene-solr by apache.

the class ConcurrentSortedSetDocValuesFacetCounts method count.

/** Does all the "real work" of tallying up the counts. */
private final void count(List<MatchingDocs> matchingDocs) throws IOException, InterruptedException {
    MultiDocValues.OrdinalMap ordinalMap;
    // matchingDocs...
    if (dv instanceof MultiDocValues.MultiSortedSetDocValues && matchingDocs.size() > 1) {
        ordinalMap = ((MultiSortedSetDocValues) dv).mapping;
    } else {
        ordinalMap = null;
    }
    IndexReader reader = state.getReader();
    List<Future<Void>> results = new ArrayList<>();
    for (MatchingDocs hits : matchingDocs) {
        // AIOOBE can happen:
        if (ReaderUtil.getTopLevelContext(hits.context).reader() != reader) {
            throw new IllegalStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
        }
        results.add(exec.submit(new CountOneSegment(hits.context.reader(), hits, ordinalMap, hits.context.ord)));
    }
    for (Future<Void> result : results) {
        try {
            result.get();
        } catch (ExecutionException ee) {
            // Theoretically cause can be null; guard against that.
            Throwable cause = ee.getCause();
            throw IOUtils.rethrowAlways(cause != null ? cause : ee);
        }
    }
}
Also used : MatchingDocs(org.apache.lucene.facet.FacetsCollector.MatchingDocs) ArrayList(java.util.ArrayList) MultiDocValues(org.apache.lucene.index.MultiDocValues) IndexReader(org.apache.lucene.index.IndexReader) Future(java.util.concurrent.Future) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) ExecutionException(java.util.concurrent.ExecutionException)

Example 2 with MultiSortedSetDocValues

use of org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues in project lucene-solr by apache.

the class ConcurrentSortedSetDocValuesFacetCounts method countAll.

/** Does all the "real work" of tallying up the counts. */
private final void countAll() throws IOException, InterruptedException {
    //System.out.println("ssdv count");
    MultiDocValues.OrdinalMap ordinalMap;
    // matchingDocs...
    if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
        ordinalMap = ((MultiSortedSetDocValues) dv).mapping;
    } else {
        ordinalMap = null;
    }
    List<Future<Void>> results = new ArrayList<>();
    for (LeafReaderContext context : state.getReader().leaves()) {
        results.add(exec.submit(new CountOneSegment(context.reader(), null, ordinalMap, context.ord)));
    }
    for (Future<Void> result : results) {
        try {
            result.get();
        } catch (ExecutionException ee) {
            // Theoretically cause can be null; guard against that.
            Throwable cause = ee.getCause();
            throw IOUtils.rethrowAlways(cause != null ? cause : ee);
        }
    }
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) MultiDocValues(org.apache.lucene.index.MultiDocValues) ExecutionException(java.util.concurrent.ExecutionException)

Example 3 with MultiSortedSetDocValues

use of org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues in project lucene-solr by apache.

the class DocValuesFacets method getCounts.

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter, FacetDebugInfo fdebug) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<>();
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    final SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    final BytesRefBuilder prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRefBuilder();
        prefixRef.copyChars(prefix);
    }
    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef.get());
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef.get());
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = (int) si.getValueCount();
    }
    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRefBuilder charsRef = new CharsRefBuilder();
    if (nTerms > 0 && docs.size() >= mincount) {
        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];
        if (fdebug != null) {
            fdebug.putInfoItem("numBuckets", nTerms);
        }
        Filter filter = docs.getTopFilter();
        List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
        for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
            LeafReaderContext leaf = leaves.get(subIndex);
            // solr docsets already exclude any deleted docs
            DocIdSet dis = filter.getDocIdSet(leaf, null);
            DocIdSetIterator disi = null;
            if (dis != null) {
                disi = dis.iterator();
            }
            if (disi != null) {
                if (multiValued) {
                    SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySortedSet();
                    }
                    final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                    if (singleton != null) {
                        // some codecs may optimize SORTED_SET storage for single-valued fields
                        accumSingle(counts, startTermIndex, singleton, disi, subIndex, ordinalMap);
                    } else {
                        accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                    }
                } else {
                    SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySorted();
                    }
                    accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                }
            }
        }
        if (startTermIndex == -1) {
            missingCount = counts[0];
        }
        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'
        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);
            // the smallest value in the top 'N' values
            int min = mincount - 1;
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    if (termFilter != null) {
                        final BytesRef term = si.lookupOrd(startTermIndex + i);
                        if (!termFilter.test(term)) {
                            continue;
                        }
                    }
                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }
            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;
            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);
            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                final BytesRef term = si.lookupOrd(startTermIndex + tnum);
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
            }
        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0 && termFilter == null) {
                // if mincount<=0 and we're not examining the values for the term filter, then
                // we won't discard any terms and we know exactly where to start.
                i += off;
                off = 0;
            }
            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount)
                    continue;
                BytesRef term = null;
                if (termFilter != null) {
                    term = si.lookupOrd(startTermIndex + i);
                    if (!termFilter.test(term)) {
                        continue;
                    }
                }
                if (--off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                if (term == null) {
                    term = si.lookupOrd(startTermIndex + i);
                }
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
            }
        }
    }
    return finalize(res, searcher, schemaField, docs, missingCount, missing);
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) LongPriorityQueue(org.apache.solr.util.LongPriorityQueue) NamedList(org.apache.solr.common.util.NamedList) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 4 with MultiSortedSetDocValues

use of org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues in project lucene-solr by apache.

the class DocValuesStats method getCounts.

public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {
    final SchemaField schemaField = statsField.getSchemaField();
    assert null != statsField.getSchemaField() : "DocValuesStats requires a StatsField using a SchemaField";
    final String fieldName = schemaField.getName();
    final FieldType ft = schemaField.getType();
    final StatsValues res = StatsValuesFactory.createStatsValues(statsField);
    //Initialize facetstats, if facets have been passed in
    final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
    int upto = 0;
    for (String facetField : facet) {
        SchemaField fsf = searcher.getSchema().getField(facetField);
        if (fsf.multiValued()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField);
        }
        SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
        facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
    }
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        si = DocValues.emptySortedSet();
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    int missingDocCountTotal = 0;
    final int nTerms = (int) si.getValueCount();
    // count collection array only needs to be as big as the number of terms we are
    // going to collect counts for.
    final int[] counts = new int[nTerms];
    Filter filter = docs.getTopFilter();
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        LeafReaderContext leaf = leaves.get(subIndex);
        // solr docsets already exclude any deleted docs
        DocIdSet dis = filter.getDocIdSet(leaf, null);
        DocIdSetIterator disi = null;
        if (dis != null) {
            disi = dis.iterator();
        }
        if (disi != null) {
            int docBase = leaf.docBase;
            if (multiValued) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySortedSet();
                }
                SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap);
                } else {
                    missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySorted();
                }
                missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
            }
        }
    }
    // add results in index order
    for (int ord = 0; ord < counts.length; ord++) {
        int count = counts[ord];
        if (count > 0) {
            final BytesRef value = si.lookupOrd(ord);
            res.accumulate(value, count);
            for (FieldFacetStats f : facetStats) {
                f.accumulateTermNum(ord, value);
            }
        }
    }
    res.addMissing(missingDocCountTotal);
    if (facetStats.length > 0) {
        for (FieldFacetStats f : facetStats) {
            Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
            f.accumulateMissing();
            res.addFacet(f.name, facetStatsValues);
        }
    }
    return res;
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) FieldFacetStats(org.apache.solr.handler.component.FieldFacetStats) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) StatsValues(org.apache.solr.handler.component.StatsValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef)

Example 5 with MultiSortedSetDocValues

use of org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues in project lucene-solr by apache.

the class DefaultSortedSetDocValuesReaderState method getDocValues.

/** Return top-level doc values. */
@Override
public SortedSetDocValues getDocValues() throws IOException {
    // TODO: this is dup'd from slow composite reader wrapper ... can we factor it out to share?
    OrdinalMap map = null;
    // why are we using a map?
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(reader, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    assert map != null;
    int size = reader.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = reader.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = reader.maxDoc();
    return new MultiSortedSetDocValues(values, starts, map, cost);
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

MultiDocValues (org.apache.lucene.index.MultiDocValues)6 MultiSortedSetDocValues (org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 IndexReader (org.apache.lucene.index.IndexReader)3 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)3 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)3 ArrayList (java.util.ArrayList)2 ExecutionException (java.util.concurrent.ExecutionException)2 Future (java.util.concurrent.Future)2 MatchingDocs (org.apache.lucene.facet.FacetsCollector.MatchingDocs)2 SortedDocValues (org.apache.lucene.index.SortedDocValues)2 DocIdSet (org.apache.lucene.search.DocIdSet)2 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)2 BytesRef (org.apache.lucene.util.BytesRef)2 FieldType (org.apache.solr.schema.FieldType)2 SchemaField (org.apache.solr.schema.SchemaField)2 Filter (org.apache.solr.search.Filter)2 FieldInfo (org.apache.lucene.index.FieldInfo)1 LeafReader (org.apache.lucene.index.LeafReader)1 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)1