use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class FacetFieldProcessorByArrayDV method findStartAndEndOrds.
@Override
protected void findStartAndEndOrds() throws IOException {
if (multiValuedField) {
si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
}
} else {
// multi-valued view
SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
si = DocValues.singleton(single);
if (single instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
}
}
if (si.getValueCount() >= Integer.MAX_VALUE) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
}
if (prefixRef != null) {
startTermIndex = (int) si.lookupTerm(prefixRef.get());
if (startTermIndex < 0)
startTermIndex = -startTermIndex - 1;
prefixRef.append(UnicodeUtil.BIG_TERM);
endTermIndex = (int) si.lookupTerm(prefixRef.get());
assert endTermIndex < 0;
endTermIndex = -endTermIndex - 1;
} else {
startTermIndex = 0;
endTermIndex = (int) si.getValueCount();
}
nTerms = endTermIndex - startTermIndex;
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class FacetFieldProcessorByArrayDV method collectDocs.
@Override
protected void collectDocs() throws IOException {
int domainSize = fcontext.base.size();
if (nTerms <= 0 || domainSize < effectiveMincount) {
// TODO: what about allBuckets? missing bucket?
return;
}
// TODO: refactor some of this logic into a base class
boolean countOnly = collectAcc == null && allBucketsAcc == null;
boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
// Are we expecting many hits per bucket?
// FUTURE: pro-rate for nTerms?
// FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
// FUTURE: take into account that bigger ord maps are more expensive than smaller ones
// One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
// than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
// this was for heap docvalues produced by UninvertingReader
// Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
long domainMultiplier = multiValuedField ? 4L : 2L;
// +3 to increase test coverage with small tests
boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);
// If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
// then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
// FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
// the docid is not used)
boolean canDoPerSeg = countOnly && fullRange;
boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
// internal - override perSeg heuristic
if (freq.perSeg != null)
accumSeg = canDoPerSeg && freq.perSeg;
final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
Filter filter = fcontext.base.getTopFilter();
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
setNextReaderFirstPhase(subCtx);
// solr docsets already exclude any deleted docs
DocIdSet dis = filter.getDocIdSet(subCtx, null);
DocIdSetIterator disi = dis.iterator();
SortedDocValues singleDv = null;
SortedSetDocValues multiDv = null;
if (multiValuedField) {
// TODO: get sub from multi?
multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
if (multiDv == null) {
multiDv = DocValues.emptySortedSet();
}
// this will be null if this is not a wrapped single valued docvalues.
if (unwrap_singleValued_multiDv) {
singleDv = DocValues.unwrapSingleton(multiDv);
}
} else {
singleDv = subCtx.reader().getSortedDocValues(sf.getName());
if (singleDv == null) {
singleDv = DocValues.emptySorted();
}
}
LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
if (singleDv != null) {
if (accumSeg) {
collectPerSeg(singleDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(singleDv, disi, toGlobal);
} else {
collectDocs(singleDv, disi, toGlobal);
}
}
} else {
if (accumSeg) {
collectPerSeg(multiDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(multiDv, disi, toGlobal);
} else {
collectDocs(multiDv, disi, toGlobal);
}
}
}
}
// better GC
reuse = null;
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestMemoryIndex method testDocValues.
public void testDocValues() throws Exception {
Document doc = new Document();
doc.add(new NumericDocValuesField("numeric", 29L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
assertEquals(0, numericDocValues.nextDoc());
assertEquals(29L, numericDocValues.longValue());
SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
assertEquals(0, sortedNumericDocValues.nextDoc());
assertEquals(5, sortedNumericDocValues.docValueCount());
assertEquals(30L, sortedNumericDocValues.nextValue());
assertEquals(31L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(33L, sortedNumericDocValues.nextValue());
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
assertEquals(0, binaryDocValues.nextDoc());
assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
assertEquals(0, sortedDocValues.nextDoc());
assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
assertEquals(0, sortedDocValues.ordValue());
assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
assertEquals(3, sortedSetDocValues.getValueCount());
assertEquals(0, sortedSetDocValues.nextDoc());
assertEquals(0L, sortedSetDocValues.nextOrd());
assertEquals(1L, sortedSetDocValues.nextOrd());
assertEquals(2L, sortedSetDocValues.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class SortedSetFieldSource method getValues.
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field);
SortedDocValues view = SortedSetSelector.wrap(sortedSet, selector);
return new DocTermsIndexDocValues(this.field, this, view) {
@Override
protected String toTerm(String readableValue) {
return readableValue;
}
@Override
public Object objectVal(int doc) throws IOException {
return strVal(doc);
}
};
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class OrdFieldSource method getValues.
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final int off = readerContext.docBase;
final LeafReader r;
Object o = context.get("searcher");
if (o instanceof SolrIndexSearcher) {
SolrIndexSearcher is = (SolrIndexSearcher) o;
SchemaField sf = is.getSchema().getFieldOrNull(field);
if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) {
// it's a single-valued numeric field: we must currently create insanity :(
List<LeafReaderContext> leaves = is.getIndexReader().leaves();
LeafReader[] insaneLeaves = new LeafReader[leaves.size()];
int upto = 0;
for (LeafReaderContext raw : leaves) {
insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
}
r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
} else {
// reuse ordinalmap
r = ((SolrIndexSearcher) o).getSlowAtomicReader();
}
} else {
IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
r = SlowCompositeReaderWrapper.wrap(topReader);
}
// if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
return new IntDocValues(this) {
private int lastDocID;
private int getOrdForDoc(int docID) throws IOException {
if (docID < lastDocID) {
throw new IllegalArgumentException("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
}
if (docID > sindex.docID()) {
sindex.advance(docID);
}
if (docID == sindex.docID()) {
return sindex.ordValue();
} else {
return -1;
}
}
protected String toTerm(String readableValue) {
return readableValue;
}
@Override
public int intVal(int doc) throws IOException {
return getOrdForDoc(doc + off);
}
@Override
public int ordVal(int doc) throws IOException {
return getOrdForDoc(doc + off);
}
@Override
public int numOrd() {
return sindex.getValueCount();
}
@Override
public boolean exists(int doc) throws IOException {
return getOrdForDoc(doc + off) != 0;
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueInt mval = new MutableValueInt();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) throws IOException {
mval.value = getOrdForDoc(doc);
mval.exists = mval.value != 0;
}
};
}
};
}
Aggregations