Search in sources :

Example 71 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestFieldCacheReopen method testFieldCacheReuseAfterReopen.

// TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
// LUCENE-1579: Ensure that on a reopened reader, that any
// shared segments reuse the doc values arrays in
// FieldCache
public void testFieldCacheReuseAfterReopen() throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(10)));
    Document doc = new Document();
    doc.add(new IntPoint("number", 17));
    writer.addDocument(doc);
    writer.commit();
    // Open reader1
    DirectoryReader r = DirectoryReader.open(dir);
    LeafReader r1 = getOnlyLeafReader(r);
    final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER);
    assertEquals(0, ints.nextDoc());
    assertEquals(17, ints.longValue());
    // Add new segment
    writer.addDocument(doc);
    writer.commit();
    // Reopen reader1 --> reader2
    DirectoryReader r2 = DirectoryReader.openIfChanged(r);
    assertNotNull(r2);
    r.close();
    LeafReader sub0 = r2.leaves().get(0).reader();
    final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER);
    r2.close();
    assertEquals(0, ints2.nextDoc());
    assertEquals(17, ints2.longValue());
    writer.close();
    dir.close();
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) NumericDocValues(org.apache.lucene.index.NumericDocValues) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LeafReader(org.apache.lucene.index.LeafReader) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 72 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class JoinUtil method createJoinQuery.

/**
   * Method for query time joining for numeric fields. It supports multi- and single- values longs, ints, floats and longs.
   * All considerations from {@link JoinUtil#createJoinQuery(String, boolean, String, Query, IndexSearcher, ScoreMode)} are applicable here too,
   * though memory consumption might be higher.
   * <p>
   *
   * @param fromField                 The from field to join from
   * @param multipleValuesPerDocument Whether the from field has multiple terms per document
   *                                  when true fromField might be {@link DocValuesType#SORTED_NUMERIC},
   *                                  otherwise fromField should be {@link DocValuesType#NUMERIC}
   * @param toField                   The to field to join to, should be {@link IntPoint}, {@link LongPoint}, {@link FloatPoint}
   *                                  or {@link DoublePoint}.
   * @param numericType               either {@link java.lang.Integer}, {@link java.lang.Long}, {@link java.lang.Float}
   *                                  or {@link java.lang.Double} it should correspond to toField types
   * @param fromQuery                 The query to match documents on the from side
   * @param fromSearcher              The searcher that executed the specified fromQuery
   * @param scoreMode                 Instructs how scores from the fromQuery are mapped to the returned query
   * @return a {@link Query} instance that can be used to join documents based on the
   *         terms in the from and to field
   * @throws IOException If I/O related errors occur
   */
public static Query createJoinQuery(String fromField, boolean multipleValuesPerDocument, String toField, Class<? extends Number> numericType, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode) throws IOException {
    TreeSet<Long> joinValues = new TreeSet<>();
    Map<Long, Float> aggregatedScores = new HashMap<>();
    Map<Long, Integer> occurrences = new HashMap<>();
    boolean needsScore = scoreMode != ScoreMode.None;
    BiConsumer<Long, Float> scoreAggregator;
    if (scoreMode == ScoreMode.Max) {
        scoreAggregator = (key, score) -> {
            Float currentValue = aggregatedScores.putIfAbsent(key, score);
            if (currentValue != null) {
                aggregatedScores.put(key, Math.max(currentValue, score));
            }
        };
    } else if (scoreMode == ScoreMode.Min) {
        scoreAggregator = (key, score) -> {
            Float currentValue = aggregatedScores.putIfAbsent(key, score);
            if (currentValue != null) {
                aggregatedScores.put(key, Math.min(currentValue, score));
            }
        };
    } else if (scoreMode == ScoreMode.Total) {
        scoreAggregator = (key, score) -> {
            Float currentValue = aggregatedScores.putIfAbsent(key, score);
            if (currentValue != null) {
                aggregatedScores.put(key, currentValue + score);
            }
        };
    } else if (scoreMode == ScoreMode.Avg) {
        scoreAggregator = (key, score) -> {
            Float currentSore = aggregatedScores.putIfAbsent(key, score);
            if (currentSore != null) {
                aggregatedScores.put(key, currentSore + score);
            }
            Integer currentOccurrence = occurrences.putIfAbsent(key, 1);
            if (currentOccurrence != null) {
                occurrences.put(key, ++currentOccurrence);
            }
        };
    } else {
        scoreAggregator = (key, score) -> {
            throw new UnsupportedOperationException();
        };
    }
    LongFunction<Float> joinScorer;
    if (scoreMode == ScoreMode.Avg) {
        joinScorer = (joinValue) -> {
            Float aggregatedScore = aggregatedScores.get(joinValue);
            Integer occurrence = occurrences.get(joinValue);
            return aggregatedScore / occurrence;
        };
    } else {
        joinScorer = aggregatedScores::get;
    }
    Collector collector;
    if (multipleValuesPerDocument) {
        collector = new SimpleCollector() {

            SortedNumericDocValues sortedNumericDocValues;

            Scorer scorer;

            @Override
            public void collect(int doc) throws IOException {
                if (doc > sortedNumericDocValues.docID()) {
                    sortedNumericDocValues.advance(doc);
                }
                if (doc == sortedNumericDocValues.docID()) {
                    for (int i = 0; i < sortedNumericDocValues.docValueCount(); i++) {
                        long value = sortedNumericDocValues.nextValue();
                        joinValues.add(value);
                        if (needsScore) {
                            scoreAggregator.accept(value, scorer.score());
                        }
                    }
                }
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                sortedNumericDocValues = DocValues.getSortedNumeric(context.reader(), fromField);
            }

            @Override
            public void setScorer(Scorer scorer) throws IOException {
                this.scorer = scorer;
            }

            @Override
            public boolean needsScores() {
                return needsScore;
            }
        };
    } else {
        collector = new SimpleCollector() {

            NumericDocValues numericDocValues;

            Scorer scorer;

            private int lastDocID = -1;

            private boolean docsInOrder(int docID) {
                if (docID < lastDocID) {
                    throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
                }
                lastDocID = docID;
                return true;
            }

            @Override
            public void collect(int doc) throws IOException {
                assert docsInOrder(doc);
                int dvDocID = numericDocValues.docID();
                if (dvDocID < doc) {
                    dvDocID = numericDocValues.advance(doc);
                }
                long value;
                if (dvDocID == doc) {
                    value = numericDocValues.longValue();
                } else {
                    value = 0;
                }
                joinValues.add(value);
                if (needsScore) {
                    scoreAggregator.accept(value, scorer.score());
                }
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                numericDocValues = DocValues.getNumeric(context.reader(), fromField);
                lastDocID = -1;
            }

            @Override
            public void setScorer(Scorer scorer) throws IOException {
                this.scorer = scorer;
            }

            @Override
            public boolean needsScores() {
                return needsScore;
            }
        };
    }
    fromSearcher.search(fromQuery, collector);
    Iterator<Long> iterator = joinValues.iterator();
    final int bytesPerDim;
    final BytesRef encoded = new BytesRef();
    final PointInSetIncludingScoreQuery.Stream stream;
    if (Integer.class.equals(numericType)) {
        bytesPerDim = Integer.BYTES;
        stream = new PointInSetIncludingScoreQuery.Stream() {

            @Override
            public BytesRef next() {
                if (iterator.hasNext()) {
                    long value = iterator.next();
                    IntPoint.encodeDimension((int) value, encoded.bytes, 0);
                    if (needsScore) {
                        score = joinScorer.apply(value);
                    }
                    return encoded;
                } else {
                    return null;
                }
            }
        };
    } else if (Long.class.equals(numericType)) {
        bytesPerDim = Long.BYTES;
        stream = new PointInSetIncludingScoreQuery.Stream() {

            @Override
            public BytesRef next() {
                if (iterator.hasNext()) {
                    long value = iterator.next();
                    LongPoint.encodeDimension(value, encoded.bytes, 0);
                    if (needsScore) {
                        score = joinScorer.apply(value);
                    }
                    return encoded;
                } else {
                    return null;
                }
            }
        };
    } else if (Float.class.equals(numericType)) {
        bytesPerDim = Float.BYTES;
        stream = new PointInSetIncludingScoreQuery.Stream() {

            @Override
            public BytesRef next() {
                if (iterator.hasNext()) {
                    long value = iterator.next();
                    FloatPoint.encodeDimension(Float.intBitsToFloat((int) value), encoded.bytes, 0);
                    if (needsScore) {
                        score = joinScorer.apply(value);
                    }
                    return encoded;
                } else {
                    return null;
                }
            }
        };
    } else if (Double.class.equals(numericType)) {
        bytesPerDim = Double.BYTES;
        stream = new PointInSetIncludingScoreQuery.Stream() {

            @Override
            public BytesRef next() {
                if (iterator.hasNext()) {
                    long value = iterator.next();
                    DoublePoint.encodeDimension(Double.longBitsToDouble(value), encoded.bytes, 0);
                    if (needsScore) {
                        score = joinScorer.apply(value);
                    }
                    return encoded;
                } else {
                    return null;
                }
            }
        };
    } else {
        throw new IllegalArgumentException("unsupported numeric type, only Integer, Long, Float and Double are supported");
    }
    encoded.bytes = new byte[bytesPerDim];
    encoded.length = bytesPerDim;
    if (needsScore) {
        return new PointInSetIncludingScoreQuery(scoreMode, fromQuery, multipleValuesPerDocument, toField, bytesPerDim, stream) {

            @Override
            protected String toString(byte[] value) {
                return toString.apply(value, numericType);
            }
        };
    } else {
        return new PointInSetQuery(toField, 1, bytesPerDim, stream) {

            @Override
            protected String toString(byte[] value) {
                return PointInSetIncludingScoreQuery.toString.apply(value, numericType);
            }
        };
    }
}
Also used : Query(org.apache.lucene.search.Query) LongPoint(org.apache.lucene.document.LongPoint) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) NumericDocValues(org.apache.lucene.index.NumericDocValues) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) DoublePoint(org.apache.lucene.document.DoublePoint) PointInSetQuery(org.apache.lucene.search.PointInSetQuery) Locale(java.util.Locale) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IntPoint(org.apache.lucene.document.IntPoint) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SortedDocValues(org.apache.lucene.index.SortedDocValues) SimpleCollector(org.apache.lucene.search.SimpleCollector) Scorer(org.apache.lucene.search.Scorer) Iterator(java.util.Iterator) LongFunction(java.util.function.LongFunction) FloatPoint(org.apache.lucene.document.FloatPoint) MultiDocValues(org.apache.lucene.index.MultiDocValues) BytesRef(org.apache.lucene.util.BytesRef) IOException(java.io.IOException) Collector(org.apache.lucene.search.Collector) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Function(org.apache.lucene.search.join.DocValuesTermsCollector.Function) DocValues(org.apache.lucene.index.DocValues) DocValuesType(org.apache.lucene.index.DocValuesType) LeafReader(org.apache.lucene.index.LeafReader) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) HashMap(java.util.HashMap) Scorer(org.apache.lucene.search.Scorer) SimpleCollector(org.apache.lucene.search.SimpleCollector) TreeSet(java.util.TreeSet) SimpleCollector(org.apache.lucene.search.SimpleCollector) Collector(org.apache.lucene.search.Collector) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) PointInSetQuery(org.apache.lucene.search.PointInSetQuery) IOException(java.io.IOException) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint)

Example 73 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestParentChildrenBlockJoinQuery method testParentChildrenBlockJoinQuery.

public void testParentChildrenBlockJoinQuery() throws Exception {
    int numParentDocs = 8 + random().nextInt(8);
    int maxChildDocsPerParent = 8 + random().nextInt(8);
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numParentDocs; i++) {
        int numChildDocs = random().nextInt(maxChildDocsPerParent);
        List<Document> docs = new ArrayList<>(numChildDocs + 1);
        for (int j = 0; j < numChildDocs; j++) {
            Document childDoc = new Document();
            childDoc.add(new StringField("type", "child", Field.Store.NO));
            childDoc.add(new NumericDocValuesField("score", j + 1));
            docs.add(childDoc);
        }
        Document parenDoc = new Document();
        parenDoc.add(new StringField("type", "parent", Field.Store.NO));
        parenDoc.add(new NumericDocValuesField("num_child_docs", numChildDocs));
        docs.add(parenDoc);
        writer.addDocuments(docs);
    }
    IndexReader reader = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(reader);
    BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
    Query childQuery = new BooleanQuery.Builder().add(new TermQuery(new Term("type", "child")), BooleanClause.Occur.FILTER).add(TestJoinUtil.numericDocValuesScoreQuery("score"), BooleanClause.Occur.SHOULD).build();
    TopDocs parentDocs = searcher.search(new TermQuery(new Term("type", "parent")), numParentDocs);
    assertEquals(parentDocs.scoreDocs.length, numParentDocs);
    for (ScoreDoc parentScoreDoc : parentDocs.scoreDocs) {
        LeafReaderContext leafReader = reader.leaves().get(ReaderUtil.subIndex(parentScoreDoc.doc, reader.leaves()));
        NumericDocValues numericDocValuesField = leafReader.reader().getNumericDocValues("num_child_docs");
        numericDocValuesField.advance(parentScoreDoc.doc - leafReader.docBase);
        long expectedChildDocs = numericDocValuesField.longValue();
        ParentChildrenBlockJoinQuery parentChildrenBlockJoinQuery = new ParentChildrenBlockJoinQuery(parentFilter, childQuery, parentScoreDoc.doc);
        TopDocs topDocs = searcher.search(parentChildrenBlockJoinQuery, maxChildDocsPerParent);
        assertEquals(expectedChildDocs, topDocs.totalHits);
        if (expectedChildDocs > 0) {
            assertEquals(expectedChildDocs, topDocs.getMaxScore(), 0);
            for (int i = 0; i < topDocs.scoreDocs.length; i++) {
                ScoreDoc childScoreDoc = topDocs.scoreDocs[i];
                assertEquals(expectedChildDocs - i, childScoreDoc.score, 0);
            }
        }
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 74 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestBlockJoinSelector method testNumericSelector.

public void testNumericSelector() throws Exception {
    final BitSet parents = new FixedBitSet(20);
    parents.set(0);
    parents.set(5);
    parents.set(6);
    parents.set(10);
    parents.set(15);
    parents.set(19);
    final BitSet children = new FixedBitSet(20);
    children.set(2);
    children.set(3);
    children.set(4);
    children.set(12);
    children.set(17);
    final long[] longs = new long[20];
    final BitSet docsWithValue = new FixedBitSet(20);
    docsWithValue.set(2);
    longs[2] = 5;
    docsWithValue.set(3);
    longs[3] = 7;
    docsWithValue.set(4);
    longs[4] = 3;
    docsWithValue.set(12);
    longs[12] = 10;
    docsWithValue.set(18);
    longs[18] = 10;
    final NumericDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedNumericDocValues(longs, docsWithValue)), BlockJoinSelector.Type.MIN, parents, children);
    assertEquals(5, mins.nextDoc());
    assertEquals(3, mins.longValue());
    assertEquals(15, mins.nextDoc());
    assertEquals(10, mins.longValue());
    assertEquals(NO_MORE_DOCS, mins.nextDoc());
    final NumericDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedNumericDocValues(longs, docsWithValue)), BlockJoinSelector.Type.MAX, parents, children);
    assertEquals(5, maxs.nextDoc());
    assertEquals(7, maxs.longValue());
    assertEquals(15, maxs.nextDoc());
    assertEquals(10, maxs.longValue());
    assertEquals(NO_MORE_DOCS, maxs.nextDoc());
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) FixedBitSet(org.apache.lucene.util.FixedBitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) BitSet(org.apache.lucene.util.BitSet)

Example 75 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class BBoxValueSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    LeafReader reader = readerContext.reader();
    final NumericDocValues minX = DocValues.getNumeric(reader, strategy.field_minX);
    final NumericDocValues minY = DocValues.getNumeric(reader, strategy.field_minY);
    final NumericDocValues maxX = DocValues.getNumeric(reader, strategy.field_maxX);
    final NumericDocValues maxY = DocValues.getNumeric(reader, strategy.field_maxY);
    //reused
    final Rectangle rect = strategy.getSpatialContext().makeRectangle(0, 0, 0, 0);
    return new FunctionValues() {

        private int lastDocID = -1;

        private double getDocValue(NumericDocValues values, int doc) throws IOException {
            int curDocID = values.docID();
            if (doc > curDocID) {
                curDocID = values.advance(doc);
            }
            if (doc == curDocID) {
                return Double.longBitsToDouble(values.longValue());
            } else {
                return 0.0;
            }
        }

        @Override
        public Object objectVal(int doc) throws IOException {
            if (doc < lastDocID) {
                throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + doc);
            }
            lastDocID = doc;
            double minXValue = getDocValue(minX, doc);
            if (minX.docID() != doc) {
                return null;
            } else {
                double minYValue = getDocValue(minY, doc);
                double maxXValue = getDocValue(maxX, doc);
                double maxYValue = getDocValue(maxY, doc);
                rect.reset(minXValue, maxXValue, minYValue, maxYValue);
                return rect;
            }
        }

        @Override
        public String strVal(int doc) throws IOException {
            //TODO support WKT output once Spatial4j does
            Object v = objectVal(doc);
            return v == null ? null : v.toString();
        }

        @Override
        public boolean exists(int doc) throws IOException {
            getDocValue(minX, doc);
            return minX.docID() == doc;
        }

        @Override
        public Explanation explain(int doc) throws IOException {
            return Explanation.match(Float.NaN, toString(doc));
        }

        @Override
        public String toString(int doc) throws IOException {
            return description() + '=' + strVal(doc);
        }
    };
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) Rectangle(org.locationtech.spatial4j.shape.Rectangle) FunctionValues(org.apache.lucene.queries.function.FunctionValues)

Aggregations

NumericDocValues (org.apache.lucene.index.NumericDocValues)81 Document (org.apache.lucene.document.Document)30 Directory (org.apache.lucene.store.Directory)29 LeafReader (org.apache.lucene.index.LeafReader)25 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)25 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)23 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)22 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)22 IOException (java.io.IOException)20 BytesRef (org.apache.lucene.util.BytesRef)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 HashSet (java.util.HashSet)16 Bits (org.apache.lucene.util.Bits)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)15 SortedDocValues (org.apache.lucene.index.SortedDocValues)15 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)12 IndexReader (org.apache.lucene.index.IndexReader)12 Term (org.apache.lucene.index.Term)12