use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestFieldCacheReopen method testFieldCacheReuseAfterReopen.
// TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
// LUCENE-1579: Ensure that on a reopened reader, that any
// shared segments reuse the doc values arrays in
// FieldCache
public void testFieldCacheReuseAfterReopen() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(10)));
Document doc = new Document();
doc.add(new IntPoint("number", 17));
writer.addDocument(doc);
writer.commit();
// Open reader1
DirectoryReader r = DirectoryReader.open(dir);
LeafReader r1 = getOnlyLeafReader(r);
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER);
assertEquals(0, ints.nextDoc());
assertEquals(17, ints.longValue());
// Add new segment
writer.addDocument(doc);
writer.commit();
// Reopen reader1 --> reader2
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
assertNotNull(r2);
r.close();
LeafReader sub0 = r2.leaves().get(0).reader();
final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER);
r2.close();
assertEquals(0, ints2.nextDoc());
assertEquals(17, ints2.longValue());
writer.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class JoinUtil method createJoinQuery.
/**
* Method for query time joining for numeric fields. It supports multi- and single- values longs, ints, floats and longs.
* All considerations from {@link JoinUtil#createJoinQuery(String, boolean, String, Query, IndexSearcher, ScoreMode)} are applicable here too,
* though memory consumption might be higher.
* <p>
*
* @param fromField The from field to join from
* @param multipleValuesPerDocument Whether the from field has multiple terms per document
* when true fromField might be {@link DocValuesType#SORTED_NUMERIC},
* otherwise fromField should be {@link DocValuesType#NUMERIC}
* @param toField The to field to join to, should be {@link IntPoint}, {@link LongPoint}, {@link FloatPoint}
* or {@link DoublePoint}.
* @param numericType either {@link java.lang.Integer}, {@link java.lang.Long}, {@link java.lang.Float}
* or {@link java.lang.Double} it should correspond to toField types
* @param fromQuery The query to match documents on the from side
* @param fromSearcher The searcher that executed the specified fromQuery
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @return a {@link Query} instance that can be used to join documents based on the
* terms in the from and to field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String fromField, boolean multipleValuesPerDocument, String toField, Class<? extends Number> numericType, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode) throws IOException {
TreeSet<Long> joinValues = new TreeSet<>();
Map<Long, Float> aggregatedScores = new HashMap<>();
Map<Long, Integer> occurrences = new HashMap<>();
boolean needsScore = scoreMode != ScoreMode.None;
BiConsumer<Long, Float> scoreAggregator;
if (scoreMode == ScoreMode.Max) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, Math.max(currentValue, score));
}
};
} else if (scoreMode == ScoreMode.Min) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, Math.min(currentValue, score));
}
};
} else if (scoreMode == ScoreMode.Total) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, currentValue + score);
}
};
} else if (scoreMode == ScoreMode.Avg) {
scoreAggregator = (key, score) -> {
Float currentSore = aggregatedScores.putIfAbsent(key, score);
if (currentSore != null) {
aggregatedScores.put(key, currentSore + score);
}
Integer currentOccurrence = occurrences.putIfAbsent(key, 1);
if (currentOccurrence != null) {
occurrences.put(key, ++currentOccurrence);
}
};
} else {
scoreAggregator = (key, score) -> {
throw new UnsupportedOperationException();
};
}
LongFunction<Float> joinScorer;
if (scoreMode == ScoreMode.Avg) {
joinScorer = (joinValue) -> {
Float aggregatedScore = aggregatedScores.get(joinValue);
Integer occurrence = occurrences.get(joinValue);
return aggregatedScore / occurrence;
};
} else {
joinScorer = aggregatedScores::get;
}
Collector collector;
if (multipleValuesPerDocument) {
collector = new SimpleCollector() {
SortedNumericDocValues sortedNumericDocValues;
Scorer scorer;
@Override
public void collect(int doc) throws IOException {
if (doc > sortedNumericDocValues.docID()) {
sortedNumericDocValues.advance(doc);
}
if (doc == sortedNumericDocValues.docID()) {
for (int i = 0; i < sortedNumericDocValues.docValueCount(); i++) {
long value = sortedNumericDocValues.nextValue();
joinValues.add(value);
if (needsScore) {
scoreAggregator.accept(value, scorer.score());
}
}
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
sortedNumericDocValues = DocValues.getSortedNumeric(context.reader(), fromField);
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public boolean needsScores() {
return needsScore;
}
};
} else {
collector = new SimpleCollector() {
NumericDocValues numericDocValues;
Scorer scorer;
private int lastDocID = -1;
private boolean docsInOrder(int docID) {
if (docID < lastDocID) {
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
}
lastDocID = docID;
return true;
}
@Override
public void collect(int doc) throws IOException {
assert docsInOrder(doc);
int dvDocID = numericDocValues.docID();
if (dvDocID < doc) {
dvDocID = numericDocValues.advance(doc);
}
long value;
if (dvDocID == doc) {
value = numericDocValues.longValue();
} else {
value = 0;
}
joinValues.add(value);
if (needsScore) {
scoreAggregator.accept(value, scorer.score());
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
numericDocValues = DocValues.getNumeric(context.reader(), fromField);
lastDocID = -1;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public boolean needsScores() {
return needsScore;
}
};
}
fromSearcher.search(fromQuery, collector);
Iterator<Long> iterator = joinValues.iterator();
final int bytesPerDim;
final BytesRef encoded = new BytesRef();
final PointInSetIncludingScoreQuery.Stream stream;
if (Integer.class.equals(numericType)) {
bytesPerDim = Integer.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
IntPoint.encodeDimension((int) value, encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Long.class.equals(numericType)) {
bytesPerDim = Long.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
LongPoint.encodeDimension(value, encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Float.class.equals(numericType)) {
bytesPerDim = Float.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
FloatPoint.encodeDimension(Float.intBitsToFloat((int) value), encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Double.class.equals(numericType)) {
bytesPerDim = Double.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
DoublePoint.encodeDimension(Double.longBitsToDouble(value), encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else {
throw new IllegalArgumentException("unsupported numeric type, only Integer, Long, Float and Double are supported");
}
encoded.bytes = new byte[bytesPerDim];
encoded.length = bytesPerDim;
if (needsScore) {
return new PointInSetIncludingScoreQuery(scoreMode, fromQuery, multipleValuesPerDocument, toField, bytesPerDim, stream) {
@Override
protected String toString(byte[] value) {
return toString.apply(value, numericType);
}
};
} else {
return new PointInSetQuery(toField, 1, bytesPerDim, stream) {
@Override
protected String toString(byte[] value) {
return PointInSetIncludingScoreQuery.toString.apply(value, numericType);
}
};
}
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestParentChildrenBlockJoinQuery method testParentChildrenBlockJoinQuery.
public void testParentChildrenBlockJoinQuery() throws Exception {
int numParentDocs = 8 + random().nextInt(8);
int maxChildDocsPerParent = 8 + random().nextInt(8);
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numParentDocs; i++) {
int numChildDocs = random().nextInt(maxChildDocsPerParent);
List<Document> docs = new ArrayList<>(numChildDocs + 1);
for (int j = 0; j < numChildDocs; j++) {
Document childDoc = new Document();
childDoc.add(new StringField("type", "child", Field.Store.NO));
childDoc.add(new NumericDocValuesField("score", j + 1));
docs.add(childDoc);
}
Document parenDoc = new Document();
parenDoc.add(new StringField("type", "parent", Field.Store.NO));
parenDoc.add(new NumericDocValuesField("num_child_docs", numChildDocs));
docs.add(parenDoc);
writer.addDocuments(docs);
}
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
Query childQuery = new BooleanQuery.Builder().add(new TermQuery(new Term("type", "child")), BooleanClause.Occur.FILTER).add(TestJoinUtil.numericDocValuesScoreQuery("score"), BooleanClause.Occur.SHOULD).build();
TopDocs parentDocs = searcher.search(new TermQuery(new Term("type", "parent")), numParentDocs);
assertEquals(parentDocs.scoreDocs.length, numParentDocs);
for (ScoreDoc parentScoreDoc : parentDocs.scoreDocs) {
LeafReaderContext leafReader = reader.leaves().get(ReaderUtil.subIndex(parentScoreDoc.doc, reader.leaves()));
NumericDocValues numericDocValuesField = leafReader.reader().getNumericDocValues("num_child_docs");
numericDocValuesField.advance(parentScoreDoc.doc - leafReader.docBase);
long expectedChildDocs = numericDocValuesField.longValue();
ParentChildrenBlockJoinQuery parentChildrenBlockJoinQuery = new ParentChildrenBlockJoinQuery(parentFilter, childQuery, parentScoreDoc.doc);
TopDocs topDocs = searcher.search(parentChildrenBlockJoinQuery, maxChildDocsPerParent);
assertEquals(expectedChildDocs, topDocs.totalHits);
if (expectedChildDocs > 0) {
assertEquals(expectedChildDocs, topDocs.getMaxScore(), 0);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc childScoreDoc = topDocs.scoreDocs[i];
assertEquals(expectedChildDocs - i, childScoreDoc.score, 0);
}
}
}
reader.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestBlockJoinSelector method testNumericSelector.
public void testNumericSelector() throws Exception {
final BitSet parents = new FixedBitSet(20);
parents.set(0);
parents.set(5);
parents.set(6);
parents.set(10);
parents.set(15);
parents.set(19);
final BitSet children = new FixedBitSet(20);
children.set(2);
children.set(3);
children.set(4);
children.set(12);
children.set(17);
final long[] longs = new long[20];
final BitSet docsWithValue = new FixedBitSet(20);
docsWithValue.set(2);
longs[2] = 5;
docsWithValue.set(3);
longs[3] = 7;
docsWithValue.set(4);
longs[4] = 3;
docsWithValue.set(12);
longs[12] = 10;
docsWithValue.set(18);
longs[18] = 10;
final NumericDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedNumericDocValues(longs, docsWithValue)), BlockJoinSelector.Type.MIN, parents, children);
assertEquals(5, mins.nextDoc());
assertEquals(3, mins.longValue());
assertEquals(15, mins.nextDoc());
assertEquals(10, mins.longValue());
assertEquals(NO_MORE_DOCS, mins.nextDoc());
final NumericDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedNumericDocValues(longs, docsWithValue)), BlockJoinSelector.Type.MAX, parents, children);
assertEquals(5, maxs.nextDoc());
assertEquals(7, maxs.longValue());
assertEquals(15, maxs.nextDoc());
assertEquals(10, maxs.longValue());
assertEquals(NO_MORE_DOCS, maxs.nextDoc());
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class BBoxValueSource method getValues.
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
LeafReader reader = readerContext.reader();
final NumericDocValues minX = DocValues.getNumeric(reader, strategy.field_minX);
final NumericDocValues minY = DocValues.getNumeric(reader, strategy.field_minY);
final NumericDocValues maxX = DocValues.getNumeric(reader, strategy.field_maxX);
final NumericDocValues maxY = DocValues.getNumeric(reader, strategy.field_maxY);
//reused
final Rectangle rect = strategy.getSpatialContext().makeRectangle(0, 0, 0, 0);
return new FunctionValues() {
private int lastDocID = -1;
private double getDocValue(NumericDocValues values, int doc) throws IOException {
int curDocID = values.docID();
if (doc > curDocID) {
curDocID = values.advance(doc);
}
if (doc == curDocID) {
return Double.longBitsToDouble(values.longValue());
} else {
return 0.0;
}
}
@Override
public Object objectVal(int doc) throws IOException {
if (doc < lastDocID) {
throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + doc);
}
lastDocID = doc;
double minXValue = getDocValue(minX, doc);
if (minX.docID() != doc) {
return null;
} else {
double minYValue = getDocValue(minY, doc);
double maxXValue = getDocValue(maxX, doc);
double maxYValue = getDocValue(maxY, doc);
rect.reset(minXValue, maxXValue, minYValue, maxYValue);
return rect;
}
}
@Override
public String strVal(int doc) throws IOException {
//TODO support WKT output once Spatial4j does
Object v = objectVal(doc);
return v == null ? null : v.toString();
}
@Override
public boolean exists(int doc) throws IOException {
getDocValue(minX, doc);
return minX.docID() == doc;
}
@Override
public Explanation explain(int doc) throws IOException {
return Explanation.match(Float.NaN, toString(doc));
}
@Override
public String toString(int doc) throws IOException {
return description() + '=' + strVal(doc);
}
};
}
Aggregations