Search in sources :

Example 61 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class FacetFieldProcessorByArrayDV method findStartAndEndOrds.

@Override
protected void findStartAndEndOrds() throws IOException {
    if (multiValuedField) {
        si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
        if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
        }
    } else {
        // multi-valued view
        SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
        si = DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
    }
    if (prefixRef != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef.get());
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef.get());
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = 0;
        endTermIndex = (int) si.getValueCount();
    }
    nTerms = endTermIndex - startTermIndex;
}
Also used : MultiDocValues(org.apache.lucene.index.MultiDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) SolrException(org.apache.solr.common.SolrException)

Example 62 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class FacetFieldProcessorByArrayDV method collectDocs.

@Override
protected void collectDocs() throws IOException {
    int domainSize = fcontext.base.size();
    if (nTerms <= 0 || domainSize < effectiveMincount) {
        // TODO: what about allBuckets? missing bucket?
        return;
    }
    // TODO: refactor some of this logic into a base class
    boolean countOnly = collectAcc == null && allBucketsAcc == null;
    boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
    // Are we expecting many hits per bucket?
    // FUTURE: pro-rate for nTerms?
    // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
    // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
    // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
    // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
    // this was for heap docvalues produced by UninvertingReader
    // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
    long domainMultiplier = multiValuedField ? 4L : 2L;
    // +3 to increase test coverage with small tests
    boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);
    // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
    // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
    // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
    // the docid is not used)
    boolean canDoPerSeg = countOnly && fullRange;
    boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
    // internal - override perSeg heuristic
    if (freq.perSeg != null)
        accumSeg = canDoPerSeg && freq.perSeg;
    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    Filter filter = fcontext.base.getTopFilter();
    for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
        LeafReaderContext subCtx = leaves.get(subIdx);
        setNextReaderFirstPhase(subCtx);
        // solr docsets already exclude any deleted docs
        DocIdSet dis = filter.getDocIdSet(subCtx, null);
        DocIdSetIterator disi = dis.iterator();
        SortedDocValues singleDv = null;
        SortedSetDocValues multiDv = null;
        if (multiValuedField) {
            // TODO: get sub from multi?
            multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
            if (multiDv == null) {
                multiDv = DocValues.emptySortedSet();
            }
            // this will be null if this is not a wrapped single valued docvalues.
            if (unwrap_singleValued_multiDv) {
                singleDv = DocValues.unwrapSingleton(multiDv);
            }
        } else {
            singleDv = subCtx.reader().getSortedDocValues(sf.getName());
            if (singleDv == null) {
                singleDv = DocValues.emptySorted();
            }
        }
        LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
        if (singleDv != null) {
            if (accumSeg) {
                collectPerSeg(singleDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(singleDv, disi, toGlobal);
                } else {
                    collectDocs(singleDv, disi, toGlobal);
                }
            }
        } else {
            if (accumSeg) {
                collectPerSeg(multiDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(multiDv, disi, toGlobal);
                } else {
                    collectDocs(multiDv, disi, toGlobal);
                }
            }
        }
    }
    // better GC
    reuse = null;
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSet(org.apache.lucene.search.DocIdSet) LongValues(org.apache.lucene.util.LongValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 63 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class TestMemoryIndex method testDocValues.

public void testDocValues() throws Exception {
    Document doc = new Document();
    doc.add(new NumericDocValuesField("numeric", 29L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
    doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
    doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
    assertEquals(0, numericDocValues.nextDoc());
    assertEquals(29L, numericDocValues.longValue());
    SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
    assertEquals(0, sortedNumericDocValues.nextDoc());
    assertEquals(5, sortedNumericDocValues.docValueCount());
    assertEquals(30L, sortedNumericDocValues.nextValue());
    assertEquals(31L, sortedNumericDocValues.nextValue());
    assertEquals(32L, sortedNumericDocValues.nextValue());
    assertEquals(32L, sortedNumericDocValues.nextValue());
    assertEquals(33L, sortedNumericDocValues.nextValue());
    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
    assertEquals(0, binaryDocValues.nextDoc());
    assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
    SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
    assertEquals(0, sortedDocValues.nextDoc());
    assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
    assertEquals(0, sortedDocValues.ordValue());
    assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
    SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
    assertEquals(3, sortedSetDocValues.getValueCount());
    assertEquals(0, sortedSetDocValues.nextDoc());
    assertEquals(0L, sortedSetDocValues.nextOrd());
    assertEquals(1L, sortedSetDocValues.nextOrd());
    assertEquals(2L, sortedSetDocValues.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
    assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
    assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
    assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 64 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class SortedSetFieldSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field);
    SortedDocValues view = SortedSetSelector.wrap(sortedSet, selector);
    return new DocTermsIndexDocValues(this.field, this, view) {

        @Override
        protected String toTerm(String readableValue) {
            return readableValue;
        }

        @Override
        public Object objectVal(int doc) throws IOException {
            return strVal(doc);
        }
    };
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DocTermsIndexDocValues(org.apache.lucene.queries.function.docvalues.DocTermsIndexDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 65 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class OrdFieldSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final int off = readerContext.docBase;
    final LeafReader r;
    Object o = context.get("searcher");
    if (o instanceof SolrIndexSearcher) {
        SolrIndexSearcher is = (SolrIndexSearcher) o;
        SchemaField sf = is.getSchema().getFieldOrNull(field);
        if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) {
            // it's a single-valued numeric field: we must currently create insanity :(
            List<LeafReaderContext> leaves = is.getIndexReader().leaves();
            LeafReader[] insaneLeaves = new LeafReader[leaves.size()];
            int upto = 0;
            for (LeafReaderContext raw : leaves) {
                insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
            }
            r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
        } else {
            // reuse ordinalmap
            r = ((SolrIndexSearcher) o).getSlowAtomicReader();
        }
    } else {
        IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
        r = SlowCompositeReaderWrapper.wrap(topReader);
    }
    // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
    final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
    return new IntDocValues(this) {

        private int lastDocID;

        private int getOrdForDoc(int docID) throws IOException {
            if (docID < lastDocID) {
                throw new IllegalArgumentException("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
            }
            if (docID > sindex.docID()) {
                sindex.advance(docID);
            }
            if (docID == sindex.docID()) {
                return sindex.ordValue();
            } else {
                return -1;
            }
        }

        protected String toTerm(String readableValue) {
            return readableValue;
        }

        @Override
        public int intVal(int doc) throws IOException {
            return getOrdForDoc(doc + off);
        }

        @Override
        public int ordVal(int doc) throws IOException {
            return getOrdForDoc(doc + off);
        }

        @Override
        public int numOrd() {
            return sindex.getValueCount();
        }

        @Override
        public boolean exists(int doc) throws IOException {
            return getOrdForDoc(doc + off) != 0;
        }

        @Override
        public ValueFiller getValueFiller() {
            return new ValueFiller() {

                private final MutableValueInt mval = new MutableValueInt();

                @Override
                public MutableValue getValue() {
                    return mval;
                }

                @Override
                public void fillValue(int doc) throws IOException {
                    mval.value = getOrdForDoc(doc);
                    mval.exists = mval.value != 0;
                }
            };
        }
    };
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) MultiReader(org.apache.lucene.index.MultiReader) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SortedDocValues(org.apache.lucene.index.SortedDocValues) SchemaField(org.apache.solr.schema.SchemaField) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) MutableValueInt(org.apache.lucene.util.mutable.MutableValueInt) IntDocValues(org.apache.lucene.queries.function.docvalues.IntDocValues)

Aggregations

SortedDocValues (org.apache.lucene.index.SortedDocValues)65 BytesRef (org.apache.lucene.util.BytesRef)32 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)27 LeafReader (org.apache.lucene.index.LeafReader)22 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)22 Document (org.apache.lucene.document.Document)21 Directory (org.apache.lucene.store.Directory)15 NumericDocValues (org.apache.lucene.index.NumericDocValues)14 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 IOException (java.io.IOException)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)11 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)10 MultiDocValues (org.apache.lucene.index.MultiDocValues)10 IndexReader (org.apache.lucene.index.IndexReader)9 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)9 ArrayList (java.util.ArrayList)8 DoublePoint (org.apache.lucene.document.DoublePoint)8 FloatPoint (org.apache.lucene.document.FloatPoint)8 IntPoint (org.apache.lucene.document.IntPoint)8