use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class TestMemoryIndex method testDocValues.
public void testDocValues() throws Exception {
Document doc = new Document();
doc.add(new NumericDocValuesField("numeric", 29L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
assertEquals(0, numericDocValues.nextDoc());
assertEquals(29L, numericDocValues.longValue());
SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
assertEquals(0, sortedNumericDocValues.nextDoc());
assertEquals(5, sortedNumericDocValues.docValueCount());
assertEquals(30L, sortedNumericDocValues.nextValue());
assertEquals(31L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(33L, sortedNumericDocValues.nextValue());
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
assertEquals(0, binaryDocValues.nextDoc());
assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
assertEquals(0, sortedDocValues.nextDoc());
assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
assertEquals(0, sortedDocValues.ordValue());
assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
assertEquals(3, sortedSetDocValues.getValueCount());
assertEquals(0, sortedSetDocValues.nextDoc());
assertEquals(0L, sortedSetDocValues.nextOrd());
assertEquals(1L, sortedSetDocValues.nextOrd());
assertEquals(2L, sortedSetDocValues.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class JoinUtil method createJoinQuery.
/**
* Method for query time joining for numeric fields. It supports multi- and single- values longs, ints, floats and longs.
* All considerations from {@link JoinUtil#createJoinQuery(String, boolean, String, Query, IndexSearcher, ScoreMode)} are applicable here too,
* though memory consumption might be higher.
* <p>
*
* @param fromField The from field to join from
* @param multipleValuesPerDocument Whether the from field has multiple terms per document
* when true fromField might be {@link DocValuesType#SORTED_NUMERIC},
* otherwise fromField should be {@link DocValuesType#NUMERIC}
* @param toField The to field to join to, should be {@link IntPoint}, {@link LongPoint}, {@link FloatPoint}
* or {@link DoublePoint}.
* @param numericType either {@link java.lang.Integer}, {@link java.lang.Long}, {@link java.lang.Float}
* or {@link java.lang.Double} it should correspond to toField types
* @param fromQuery The query to match documents on the from side
* @param fromSearcher The searcher that executed the specified fromQuery
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @return a {@link Query} instance that can be used to join documents based on the
* terms in the from and to field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String fromField, boolean multipleValuesPerDocument, String toField, Class<? extends Number> numericType, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode) throws IOException {
TreeSet<Long> joinValues = new TreeSet<>();
Map<Long, Float> aggregatedScores = new HashMap<>();
Map<Long, Integer> occurrences = new HashMap<>();
boolean needsScore = scoreMode != ScoreMode.None;
BiConsumer<Long, Float> scoreAggregator;
if (scoreMode == ScoreMode.Max) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, Math.max(currentValue, score));
}
};
} else if (scoreMode == ScoreMode.Min) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, Math.min(currentValue, score));
}
};
} else if (scoreMode == ScoreMode.Total) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, currentValue + score);
}
};
} else if (scoreMode == ScoreMode.Avg) {
scoreAggregator = (key, score) -> {
Float currentSore = aggregatedScores.putIfAbsent(key, score);
if (currentSore != null) {
aggregatedScores.put(key, currentSore + score);
}
Integer currentOccurrence = occurrences.putIfAbsent(key, 1);
if (currentOccurrence != null) {
occurrences.put(key, ++currentOccurrence);
}
};
} else {
scoreAggregator = (key, score) -> {
throw new UnsupportedOperationException();
};
}
LongFunction<Float> joinScorer;
if (scoreMode == ScoreMode.Avg) {
joinScorer = (joinValue) -> {
Float aggregatedScore = aggregatedScores.get(joinValue);
Integer occurrence = occurrences.get(joinValue);
return aggregatedScore / occurrence;
};
} else {
joinScorer = aggregatedScores::get;
}
Collector collector;
if (multipleValuesPerDocument) {
collector = new SimpleCollector() {
SortedNumericDocValues sortedNumericDocValues;
Scorer scorer;
@Override
public void collect(int doc) throws IOException {
if (doc > sortedNumericDocValues.docID()) {
sortedNumericDocValues.advance(doc);
}
if (doc == sortedNumericDocValues.docID()) {
for (int i = 0; i < sortedNumericDocValues.docValueCount(); i++) {
long value = sortedNumericDocValues.nextValue();
joinValues.add(value);
if (needsScore) {
scoreAggregator.accept(value, scorer.score());
}
}
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
sortedNumericDocValues = DocValues.getSortedNumeric(context.reader(), fromField);
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public boolean needsScores() {
return needsScore;
}
};
} else {
collector = new SimpleCollector() {
NumericDocValues numericDocValues;
Scorer scorer;
private int lastDocID = -1;
private boolean docsInOrder(int docID) {
if (docID < lastDocID) {
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
}
lastDocID = docID;
return true;
}
@Override
public void collect(int doc) throws IOException {
assert docsInOrder(doc);
int dvDocID = numericDocValues.docID();
if (dvDocID < doc) {
dvDocID = numericDocValues.advance(doc);
}
long value;
if (dvDocID == doc) {
value = numericDocValues.longValue();
} else {
value = 0;
}
joinValues.add(value);
if (needsScore) {
scoreAggregator.accept(value, scorer.score());
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
numericDocValues = DocValues.getNumeric(context.reader(), fromField);
lastDocID = -1;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public boolean needsScores() {
return needsScore;
}
};
}
fromSearcher.search(fromQuery, collector);
Iterator<Long> iterator = joinValues.iterator();
final int bytesPerDim;
final BytesRef encoded = new BytesRef();
final PointInSetIncludingScoreQuery.Stream stream;
if (Integer.class.equals(numericType)) {
bytesPerDim = Integer.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
IntPoint.encodeDimension((int) value, encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Long.class.equals(numericType)) {
bytesPerDim = Long.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
LongPoint.encodeDimension(value, encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Float.class.equals(numericType)) {
bytesPerDim = Float.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
FloatPoint.encodeDimension(Float.intBitsToFloat((int) value), encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Double.class.equals(numericType)) {
bytesPerDim = Double.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
DoublePoint.encodeDimension(Double.longBitsToDouble(value), encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else {
throw new IllegalArgumentException("unsupported numeric type, only Integer, Long, Float and Double are supported");
}
encoded.bytes = new byte[bytesPerDim];
encoded.length = bytesPerDim;
if (needsScore) {
return new PointInSetIncludingScoreQuery(scoreMode, fromQuery, multipleValuesPerDocument, toField, bytesPerDim, stream) {
@Override
protected String toString(byte[] value) {
return toString.apply(value, numericType);
}
};
} else {
return new PointInSetQuery(toField, 1, bytesPerDim, stream) {
@Override
protected String toString(byte[] value) {
return PointInSetIncludingScoreQuery.toString.apply(value, numericType);
}
};
}
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class ToParentBlockJoinSortField method getFloatComparator.
private FieldComparator<?> getFloatComparator(int numHits) {
return new FieldComparator.FloatComparator(numHits, getField(), (Float) missingValue) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
final BlockJoinSelector.Type type = order ? BlockJoinSelector.Type.MAX : BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptyNumeric();
}
return new FilterNumericDocValues(BlockJoinSelector.wrap(sortedNumeric, type, parents, children)) {
@Override
public long longValue() throws IOException {
// undo the numericutils sortability
return NumericUtils.sortableFloatBits((int) super.longValue());
}
};
}
};
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class IntervalFacets method getCountMultiValuedNumeric.
private void getCountMultiValuedNumeric() throws IOException {
final FieldType ft = schemaField.getType();
final String fieldName = schemaField.getName();
if (ft.getNumberType() == null) {
throw new IllegalStateException();
}
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
SortedNumericDocValues longs = null;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
longs = DocValues.getSortedNumeric(ctx.reader(), fieldName);
}
int valuesDocID = longs.docID();
if (valuesDocID < doc - ctx.docBase) {
valuesDocID = longs.advance(doc - ctx.docBase);
}
if (valuesDocID == doc - ctx.docBase) {
accumIntervalWithMultipleValues(longs);
}
}
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class NumericFacets method getCountsMultiValued.
private static NamedList<Integer> getCountsMultiValued(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
// If facet.mincount=0 with PointFields the only option is to get the values from DocValues
// not currently supported. See SOLR-10033
mincount = Math.max(mincount, 1);
final SchemaField sf = searcher.getSchema().getField(fieldName);
final FieldType ft = sf.getType();
assert sf.multiValued();
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
// 1. accumulate
final HashTable hashTable = new HashTable(false);
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
SortedNumericDocValues longs = null;
int missingCount = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
longs = DocValues.getSortedNumeric(ctx.reader(), fieldName);
}
int valuesDocID = longs.docID();
if (valuesDocID < doc - ctx.docBase) {
valuesDocID = longs.advance(doc - ctx.docBase);
}
if (valuesDocID == doc - ctx.docBase) {
// This document must have at least one value
long l = longs.nextValue();
hashTable.add(l, 1);
for (int i = 1; i < longs.docValueCount(); i++) {
long lnew = longs.nextValue();
if (lnew > l) {
// Skip the value if it's equal to the last one, we don't want to double-count it
hashTable.add(lnew, 1);
}
l = lnew;
}
} else {
++missingCount;
}
}
// 2. select top-k facet values
final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
final PriorityQueue<Entry> pq;
if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
return true;
} else {
return false;
}
}
};
} else {
// sort=index
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
return a.bits > b.bits;
}
};
}
Entry e = null;
for (int i = 0; i < hashTable.bits.length; ++i) {
if (hashTable.counts[i] >= mincount) {
if (e == null) {
e = new Entry();
}
e.bits = hashTable.bits[i];
e.count = hashTable.counts[i];
e = pq.insertWithOverflow(e);
}
}
// 4. build the NamedList
final NamedList<Integer> result = new NamedList<>(Math.max(pq.size() - offset + 1, 1));
final Deque<Entry> counts = new ArrayDeque<>(pq.size() - offset);
while (pq.size() > offset) {
counts.addFirst(pq.pop());
}
for (Entry entry : counts) {
// TODO: convert to correct value
result.add(bitsToStringValue(ft, entry.bits), entry.count);
}
if (missing) {
result.add(null, missingCount);
}
return result;
}
Aggregations