Search in sources :

Example 1 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project elasticsearch by elastic.

the class ParentChildIndexFieldData method localGlobalDirect.

@Override
public IndexParentChildFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception {
    final long startTime = System.nanoTime();
    long ramBytesUsed = 0;
    final Map<String, OrdinalMapAndAtomicFieldData> perType = new HashMap<>();
    for (String type : parentTypes) {
        final AtomicParentChildFieldData[] fieldData = new AtomicParentChildFieldData[indexReader.leaves().size()];
        for (LeafReaderContext context : indexReader.leaves()) {
            fieldData[context.ord] = load(context);
        }
        final OrdinalMap ordMap = buildOrdinalMap(fieldData, type);
        ramBytesUsed += ordMap.ramBytesUsed();
        perType.put(type, new OrdinalMapAndAtomicFieldData(ordMap, fieldData));
    }
    final AtomicParentChildFieldData[] fielddata = new AtomicParentChildFieldData[indexReader.leaves().size()];
    for (int i = 0; i < fielddata.length; ++i) {
        fielddata[i] = new GlobalAtomicFieldData(parentTypes, perType, i);
    }
    breakerService.getBreaker(CircuitBreaker.FIELDDATA).addWithoutBreaking(ramBytesUsed);
    if (logger.isDebugEnabled()) {
        logger.debug("global-ordinals [_parent] took [{}]", new TimeValue(System.nanoTime() - startTime, TimeUnit.NANOSECONDS));
    }
    return new GlobalFieldData(indexReader, fielddata, ramBytesUsed, perType);
}
Also used : HashMap(java.util.HashMap) AtomicParentChildFieldData(org.elasticsearch.index.fielddata.AtomicParentChildFieldData) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TimeValue(org.elasticsearch.common.unit.TimeValue)

Example 2 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class DocValuesFacets method getCounts.

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter, FacetDebugInfo fdebug) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<>();
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    final SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    final BytesRefBuilder prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRefBuilder();
        prefixRef.copyChars(prefix);
    }
    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef.get());
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef.get());
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = (int) si.getValueCount();
    }
    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRefBuilder charsRef = new CharsRefBuilder();
    if (nTerms > 0 && docs.size() >= mincount) {
        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];
        if (fdebug != null) {
            fdebug.putInfoItem("numBuckets", nTerms);
        }
        Filter filter = docs.getTopFilter();
        List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
        for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
            LeafReaderContext leaf = leaves.get(subIndex);
            // solr docsets already exclude any deleted docs
            DocIdSet dis = filter.getDocIdSet(leaf, null);
            DocIdSetIterator disi = null;
            if (dis != null) {
                disi = dis.iterator();
            }
            if (disi != null) {
                if (multiValued) {
                    SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySortedSet();
                    }
                    final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                    if (singleton != null) {
                        // some codecs may optimize SORTED_SET storage for single-valued fields
                        accumSingle(counts, startTermIndex, singleton, disi, subIndex, ordinalMap);
                    } else {
                        accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                    }
                } else {
                    SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySorted();
                    }
                    accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                }
            }
        }
        if (startTermIndex == -1) {
            missingCount = counts[0];
        }
        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'
        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);
            // the smallest value in the top 'N' values
            int min = mincount - 1;
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    if (termFilter != null) {
                        final BytesRef term = si.lookupOrd(startTermIndex + i);
                        if (!termFilter.test(term)) {
                            continue;
                        }
                    }
                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }
            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;
            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);
            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                final BytesRef term = si.lookupOrd(startTermIndex + tnum);
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
            }
        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0 && termFilter == null) {
                // if mincount<=0 and we're not examining the values for the term filter, then
                // we won't discard any terms and we know exactly where to start.
                i += off;
                off = 0;
            }
            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount)
                    continue;
                BytesRef term = null;
                if (termFilter != null) {
                    term = si.lookupOrd(startTermIndex + i);
                    if (!termFilter.test(term)) {
                        continue;
                    }
                }
                if (--off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                if (term == null) {
                    term = si.lookupOrd(startTermIndex + i);
                }
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
            }
        }
    }
    return finalize(res, searcher, schemaField, docs, missingCount, missing);
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) LongPriorityQueue(org.apache.solr.util.LongPriorityQueue) NamedList(org.apache.solr.common.util.NamedList) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 3 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class DocValuesStats method getCounts.

public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {
    final SchemaField schemaField = statsField.getSchemaField();
    assert null != statsField.getSchemaField() : "DocValuesStats requires a StatsField using a SchemaField";
    final String fieldName = schemaField.getName();
    final FieldType ft = schemaField.getType();
    final StatsValues res = StatsValuesFactory.createStatsValues(statsField);
    //Initialize facetstats, if facets have been passed in
    final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
    int upto = 0;
    for (String facetField : facet) {
        SchemaField fsf = searcher.getSchema().getField(facetField);
        if (fsf.multiValued()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField);
        }
        SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
        facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
    }
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        si = DocValues.emptySortedSet();
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    int missingDocCountTotal = 0;
    final int nTerms = (int) si.getValueCount();
    // count collection array only needs to be as big as the number of terms we are
    // going to collect counts for.
    final int[] counts = new int[nTerms];
    Filter filter = docs.getTopFilter();
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        LeafReaderContext leaf = leaves.get(subIndex);
        // solr docsets already exclude any deleted docs
        DocIdSet dis = filter.getDocIdSet(leaf, null);
        DocIdSetIterator disi = null;
        if (dis != null) {
            disi = dis.iterator();
        }
        if (disi != null) {
            int docBase = leaf.docBase;
            if (multiValued) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySortedSet();
                }
                SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap);
                } else {
                    missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySorted();
                }
                missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
            }
        }
    }
    // add results in index order
    for (int ord = 0; ord < counts.length; ord++) {
        int count = counts[ord];
        if (count > 0) {
            final BytesRef value = si.lookupOrd(ord);
            res.accumulate(value, count);
            for (FieldFacetStats f : facetStats) {
                f.accumulateTermNum(ord, value);
            }
        }
    }
    res.addMissing(missingDocCountTotal);
    if (facetStats.length > 0) {
        for (FieldFacetStats f : facetStats) {
            Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
            f.accumulateMissing();
            res.addFacet(f.name, facetStatsValues);
        }
    }
    return res;
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) FieldFacetStats(org.apache.solr.handler.component.FieldFacetStats) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) StatsValues(org.apache.solr.handler.component.StatsValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class SlowCompositeReaderWrapper method getSortedDocValues.

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
    ensureOpen();
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
            if (dv instanceof MultiSortedDocValues) {
                map = ((MultiSortedDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    int size = in.leaves().size();
    final SortedDocValues[] values = new SortedDocValues[size];
    final int[] starts = new int[size + 1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
            return null;
        }
        SortedDocValues v = reader.getSortedDocValues(field);
        if (v == null) {
            v = DocValues.emptySorted();
        }
        totalCost += v.cost();
        values[i] = v;
        starts[i] = context.docBase;
    }
    starts[size] = maxDoc();
    return new MultiSortedDocValues(values, starts, map, totalCost);
}
Also used : MultiSortedDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) MultiSortedDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedDocValues)

Example 5 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class SlowCompositeReaderWrapper method getSortedSetDocValues.

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    assert map != null;
    int size = in.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = maxDoc();
    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}
Also used : OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap)

Aggregations

OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)15 BytesRef (org.apache.lucene.util.BytesRef)11 MultiDocValues (org.apache.lucene.index.MultiDocValues)8 SortedDocValues (org.apache.lucene.index.SortedDocValues)8 Document (org.apache.lucene.document.Document)7 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)7 Directory (org.apache.lucene.store.Directory)7 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)6 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)6 BooleanQuery (org.apache.lucene.search.BooleanQuery)6 FieldValueQuery (org.apache.lucene.search.FieldValueQuery)6 IndexSearcher (org.apache.lucene.search.IndexSearcher)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)6 Query (org.apache.lucene.search.Query)6 TermQuery (org.apache.lucene.search.TermQuery)6 DoublePoint (org.apache.lucene.document.DoublePoint)5 FloatPoint (org.apache.lucene.document.FloatPoint)5 IntPoint (org.apache.lucene.document.IntPoint)5