Search in sources :

Example 1 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class DocValuesFacets method migrateGlobal.

/** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) */
static void migrateGlobal(int[] counts, int[] segCounts, int subIndex, OrdinalMap map) {
    final LongValues ordMap = map.getGlobalOrds(subIndex);
    // missing count
    counts[0] += segCounts[0];
    // migrate actual ordinals
    for (int ord = 1; ord < segCounts.length; ord++) {
        int count = segCounts[ord];
        if (count != 0) {
            counts[1 + (int) ordMap.get(ord - 1)] += count;
        }
    }
}
Also used : LongValues(org.apache.lucene.util.LongValues)

Example 2 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class DocValuesFacets method accumMultiGeneric.

/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */
static void accumMultiGeneric(int[] counts, int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
    final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
    int doc;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        if (si.advanceExact(doc)) {
            // strange do-while to collect the missing count (first ord is NO_MORE_ORDS)
            int term = (int) si.nextOrd();
            do {
                if (map != null) {
                    term = (int) ordMap.get(term);
                }
                int arrIdx = term - startTermIndex;
                if (arrIdx >= 0 && arrIdx < counts.length)
                    counts[arrIdx]++;
            } while ((term = (int) si.nextOrd()) >= 0);
        } else if (startTermIndex == -1) {
            // missing count
            counts[0]++;
        }
    }
}
Also used : LongValues(org.apache.lucene.util.LongValues)

Example 3 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class FacetFieldProcessorByHashDV method collectDocs.

private void collectDocs() throws IOException {
    if (calc instanceof TermOrdCalc) {
        // Strings
        // TODO support SortedSetDocValues
        SortedDocValues globalDocValues = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
        ((TermOrdCalc) calc).lookupOrdFunction = ord -> {
            try {
                return globalDocValues.lookupOrd(ord);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        };
        DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {

            // this segment/leaf. NN
            SortedDocValues docValues = globalDocValues;

            // this segment to global ordinal. NN
            LongValues toGlobal = LongValues.IDENTITY;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
                setNextReaderFirstPhase(ctx);
                if (globalDocValues instanceof MultiDocValues.MultiSortedDocValues) {
                    MultiDocValues.MultiSortedDocValues multiDocValues = (MultiDocValues.MultiSortedDocValues) globalDocValues;
                    docValues = multiDocValues.values[ctx.ord];
                    toGlobal = multiDocValues.mapping.getGlobalOrds(ctx.ord);
                }
            }

            @Override
            public void collect(int segDoc) throws IOException {
                if (segDoc > docValues.docID()) {
                    docValues.advance(segDoc);
                }
                if (segDoc == docValues.docID()) {
                    long val = toGlobal.get(docValues.ordValue());
                    collectValFirstPhase(segDoc, val);
                }
            }
        });
    } else {
        // Numeric:
        // TODO support SortedNumericDocValues
        DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {

            //NN
            NumericDocValues values = null;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
                setNextReaderFirstPhase(ctx);
                values = DocValues.getNumeric(ctx.reader(), sf.getName());
            }

            @Override
            public void collect(int segDoc) throws IOException {
                if (segDoc > values.docID()) {
                    values.advance(segDoc);
                }
                if (segDoc == values.docID()) {
                    collectValFirstPhase(segDoc, values.longValue());
                }
            }
        });
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) IOException(java.io.IOException) MultiDocValues(org.apache.lucene.index.MultiDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) SimpleCollector(org.apache.lucene.search.SimpleCollector) LongValues(org.apache.lucene.util.LongValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 4 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class ExpandComponent method process.

@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
    if (!rb.doExpand) {
        return;
    }
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    String field = params.get(ExpandParams.EXPAND_FIELD);
    String hint = null;
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                    hint = cp.hint;
                }
            }
        }
    }
    if (field == null) {
        throw new IOException("Expand field is null.");
    }
    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
    Sort sort = null;
    if (sortParam != null) {
        sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
    }
    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    List<Query> newFilters = new ArrayList<>();
    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    SolrIndexSearcher searcher = req.getSearcher();
    LeafReader reader = searcher.getSlowAtomicReader();
    SchemaField schemaField = searcher.getSchema().getField(field);
    FieldType fieldType = schemaField.getType();
    SortedDocValues values = null;
    long nullValue = 0L;
    if (fieldType instanceof StrField) {
        //Get The Top Level SortedDocValues
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
    } else {
        //Get the nullValue for the numeric collapse field
        String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
        final NumberType numType = fieldType.getNumberType();
        // we don't need to handle invalid 64-bit field types here.
        if (defaultValue != null) {
            if (numType == NumberType.INTEGER) {
                nullValue = Long.parseLong(defaultValue);
            } else if (numType == NumberType.FLOAT) {
                nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
            }
        } else if (NumberType.FLOAT.equals(numType)) {
            // Integer case already handled by nullValue defaulting to 0
            nullValue = Float.floatToIntBits(0.0f);
        }
    }
    FixedBitSet groupBits = null;
    LongHashSet groupSet = null;
    DocList docList = rb.getResults().docList;
    IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
    //Gather the groups for the current page of documents
    DocIterator idit = docList.iterator();
    int[] globalDocs = new int[docList.size()];
    int docsIndex = -1;
    while (idit.hasNext()) {
        globalDocs[++docsIndex] = idit.nextDoc();
    }
    Arrays.sort(globalDocs);
    Query groupQuery = null;
    /*
    * This code gathers the group information for the current page.
    */
    List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
    if (contexts.size() == 0) {
        //When no context is available we can skip the expanding
        return;
    }
    int currentContext = 0;
    int currentDocBase = contexts.get(currentContext).docBase;
    int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
    IntObjectHashMap<BytesRef> ordBytes = null;
    if (values != null) {
        groupBits = new FixedBitSet(values.getValueCount());
        MultiDocValues.OrdinalMap ordinalMap = null;
        SortedDocValues[] sortedDocValues = null;
        LongValues segmentOrdinalMap = null;
        SortedDocValues currentValues = null;
        if (values instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
            sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
            currentValues = sortedDocValues[currentContext];
            segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
        }
        int count = 0;
        ordBytes = new IntObjectHashMap<>();
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                if (ordinalMap != null) {
                    currentValues = sortedDocValues[currentContext];
                    segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
                }
            }
            int contextDoc = globalDoc - currentDocBase;
            if (ordinalMap != null) {
                if (contextDoc > currentValues.docID()) {
                    currentValues.advance(contextDoc);
                }
                if (contextDoc == currentValues.docID()) {
                    int ord = currentValues.ordValue();
                    ++count;
                    BytesRef ref = currentValues.lookupOrd(ord);
                    ord = (int) segmentOrdinalMap.get(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            } else {
                if (globalDoc > values.docID()) {
                    values.advance(globalDoc);
                }
                if (globalDoc == values.docID()) {
                    int ord = values.ordValue();
                    ++count;
                    BytesRef ref = values.lookupOrd(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            }
        }
        if (count > 0 && count < 200) {
            try {
                groupQuery = getGroupQuery(field, count, ordBytes);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    } else {
        groupSet = new LongHashSet(docList.size());
        NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
        int count = 0;
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
            }
            int contextDoc = globalDoc - currentDocBase;
            int valueDocID = collapseValues.docID();
            if (valueDocID < contextDoc) {
                valueDocID = collapseValues.advance(contextDoc);
            }
            long value;
            if (valueDocID == contextDoc) {
                value = collapseValues.longValue();
            } else {
                value = 0;
            }
            if (value != nullValue) {
                ++count;
                groupSet.add(value);
                collapsedSet.add(globalDoc);
            }
        }
        if (count > 0 && count < 200) {
            if (fieldType.isPointField()) {
                groupQuery = getPointGroupQuery(schemaField, count, groupSet);
            } else {
                groupQuery = getGroupQuery(field, fieldType, count, groupSet);
            }
        }
    }
    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    Collector groupExpandCollector = null;
    if (values != null) {
        //Get The Top Level SortedDocValues again so we can re-iterate:
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
        groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
    } else {
        groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
    }
    if (groupQuery != null) {
        //Limits the results to documents that are in the same group as the documents in the page.
        newFilters.add(groupQuery);
    }
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }
    if (pfilter.filter == null) {
        searcher.search(query, collector);
    } else {
        Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
        searcher.search(q, collector);
    }
    LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
    NamedList outMap = new SimpleOrderedMap();
    CharsRefBuilder charsRef = new CharsRefBuilder();
    for (LongObjectCursor<Collector> cursor : groups) {
        long groupValue = cursor.key;
        TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
            if (fieldType instanceof StrField) {
                final BytesRef bytesRef = ordBytes.get((int) groupValue);
                fieldType.indexedToReadable(bytesRef, charsRef);
                String group = charsRef.toString();
                outMap.add(group, slice);
            } else {
                outMap.add(numericToString(fieldType, groupValue), slice);
            }
        }
    }
    rb.rsp.add("expanded", outMap);
}
Also used : StrField(org.apache.solr.schema.StrField) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) LongObjectHashMap(com.carrotsearch.hppc.LongObjectHashMap) IntObjectHashMap(com.carrotsearch.hppc.IntObjectHashMap) ArrayList(java.util.ArrayList) IntHashSet(com.carrotsearch.hppc.IntHashSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) DocSlice(org.apache.solr.search.DocSlice) ScoreDoc(org.apache.lucene.search.ScoreDoc) FixedBitSet(org.apache.lucene.util.FixedBitSet) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SortedDocValues(org.apache.lucene.index.SortedDocValues) LongHashSet(com.carrotsearch.hppc.LongHashSet) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) NumberType(org.apache.solr.schema.NumberType) QParser(org.apache.solr.search.QParser) SolrParams(org.apache.solr.common.params.SolrParams) NumericDocValues(org.apache.lucene.index.NumericDocValues) DocIterator(org.apache.solr.search.DocIterator) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) UninvertingReader(org.apache.solr.uninverting.UninvertingReader) TopDocs(org.apache.lucene.search.TopDocs) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) LeafCollector(org.apache.lucene.search.LeafCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) FilterLeafReader(org.apache.lucene.index.FilterLeafReader) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) FieldType(org.apache.solr.schema.FieldType) CollapsingQParserPlugin(org.apache.solr.search.CollapsingQParserPlugin) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) FieldType(org.apache.solr.schema.FieldType) DocValuesType(org.apache.lucene.index.DocValuesType) LongValues(org.apache.lucene.util.LongValues) DocList(org.apache.solr.search.DocList)

Example 5 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class TestDirectMonotonic method testSimple.

public void testSimple() throws IOException {
    Directory dir = newDirectory();
    final int blockShift = 2;
    List<Long> actualValues = Arrays.asList(1L, 2L, 5L, 7L, 8L, 100L);
    final int numValues = actualValues.size();
    final long dataLength;
    try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT);
        IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) {
        DirectMonotonicWriter w = DirectMonotonicWriter.getInstance(metaOut, dataOut, numValues, blockShift);
        for (long v : actualValues) {
            w.add(v);
        }
        w.finish();
        dataLength = dataOut.getFilePointer();
    }
    try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE);
        IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) {
        DirectMonotonicReader.Meta meta = DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift);
        LongValues values = DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
        for (int i = 0; i < numValues; ++i) {
            final long v = values.get(i);
            assertEquals(actualValues.get(i).longValue(), v);
        }
    }
    dir.close();
}
Also used : IndexInput(org.apache.lucene.store.IndexInput) LongValues(org.apache.lucene.util.LongValues) IndexOutput(org.apache.lucene.store.IndexOutput) Directory(org.apache.lucene.store.Directory)

Aggregations

LongValues (org.apache.lucene.util.LongValues)31 IOException (java.io.IOException)8 RandomAccessInput (org.apache.lucene.store.RandomAccessInput)8 IndexInput (org.apache.lucene.store.IndexInput)7 BytesRef (org.apache.lucene.util.BytesRef)6 IndexOutput (org.apache.lucene.store.IndexOutput)5 Directory (org.apache.lucene.store.Directory)4 ArrayList (java.util.ArrayList)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 NumericDocValues (org.apache.lucene.index.NumericDocValues)3 SortedDocValues (org.apache.lucene.index.SortedDocValues)3 Bits (org.apache.lucene.util.Bits)3 MultiDocValues (org.apache.lucene.index.MultiDocValues)2 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)2 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)2 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)2 DirectWriter (org.apache.lucene.util.packed.DirectWriter)2 FieldFacetStats (org.apache.solr.handler.component.FieldFacetStats)2 IntHashSet (com.carrotsearch.hppc.IntHashSet)1 IntObjectHashMap (com.carrotsearch.hppc.IntObjectHashMap)1