Search in sources :

Example 1 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class RangeFieldQuery method createWeight.

@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        final RangeFieldComparator target = new RangeFieldComparator();

        private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
            DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            values.intersect(new IntersectVisitor() {

                DocIdSetBuilder.BulkAdder adder;

                @Override
                public void grow(int count) {
                    adder = result.grow(count);
                }

                @Override
                public void visit(int docID) throws IOException {
                    adder.add(docID);
                }

                @Override
                public void visit(int docID, byte[] leaf) throws IOException {
                    if (target.matches(leaf)) {
                        adder.add(docID);
                    }
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    return compareRange(minPackedValue, maxPackedValue);
                }
            });
            return result.build();
        }

        private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
            byte[] node = getInternalRange(minPackedValue, maxPackedValue);
            // compute range relation for BKD traversal
            if (target.intersects(node) == false) {
                return Relation.CELL_OUTSIDE_QUERY;
            } else if (target.within(node)) {
                // target within cell; continue traversing:
                return Relation.CELL_CROSSES_QUERY;
            } else if (target.contains(node)) {
                // target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
                return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ? Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
            }
            // target intersects cell; continue traversing:
            return Relation.CELL_CROSSES_QUERY;
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // no docs in this segment indexed any ranges
                return null;
            }
            FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
            if (fieldInfo == null) {
                // no docs in this segment indexed this field
                return null;
            }
            checkFieldInfo(fieldInfo);
            boolean allDocsMatch = false;
            if (values.getDocCount() == reader.maxDoc() && compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
                allDocsMatch = true;
            }
            DocIdSetIterator iterator = allDocsMatch == true ? DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
            return new ConstantScoreScorer(this, score(), iterator);
        }

        /** get an encoded byte representation of the internal node; this is
       *  the lower half of the min array and the upper half of the max array */
        private byte[] getInternalRange(byte[] min, byte[] max) {
            byte[] range = new byte[min.length];
            final int dimSize = numDims * bytesPerDim;
            System.arraycopy(min, 0, range, 0, dimSize);
            System.arraycopy(max, dimSize, range, dimSize, dimSize);
            return range;
        }
    };
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) LeafReader(org.apache.lucene.index.LeafReader) IOException(java.io.IOException) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 2 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class TestLucene60PointsFormat method testEstimatePointCount.

public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // number of points per leaf hard to predict
    while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
        iwc.setMergePolicy(newMergePolicy());
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] pointValue = new byte[3];
    byte[] uniquePointValue = new byte[3];
    random().nextBytes(uniquePointValue);
    // make sure we have several leaves
    final int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (i == numDocs / 2) {
            doc.add(new BinaryPoint("f", uniquePointValue));
        } else {
            do {
                random().nextBytes(pointValue);
            } while (Arrays.equals(pointValue, uniquePointValue));
            doc.add(new BinaryPoint("f", pointValue));
        }
        w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
    assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
                return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (maxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((maxPointsInLeafNode + 1) / 2));
    r.close();
    dir.close();
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) MockRandomMergePolicy(org.apache.lucene.index.MockRandomMergePolicy) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 3 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class TestPointValues method doTestMergedStats.

private void doTestMergedStats() throws IOException {
    final int numDims = TestUtil.nextInt(random(), 1, 8);
    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
    Directory dir = new RAMDirectory();
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
    final int numDocs = TestUtil.nextInt(random(), 10, 20);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        final int numPoints = random().nextInt(3);
        for (int j = 0; j < numPoints; ++j) {
            doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
        }
        w.addDocument(doc);
        if (random().nextBoolean()) {
            DirectoryReader.open(w).close();
        }
    }
    final IndexReader reader1 = DirectoryReader.open(w);
    w.forceMerge(1);
    final IndexReader reader2 = DirectoryReader.open(w);
    final PointValues expected = getOnlyLeafReader(reader2).getPointValues("field");
    if (expected == null) {
        assertNull(PointValues.getMinPackedValue(reader1, "field"));
        assertNull(PointValues.getMaxPackedValue(reader1, "field"));
        assertEquals(0, PointValues.getDocCount(reader1, "field"));
        assertEquals(0, PointValues.size(reader1, "field"));
    } else {
        assertArrayEquals(expected.getMinPackedValue(), PointValues.getMinPackedValue(reader1, "field"));
        assertArrayEquals(expected.getMaxPackedValue(), PointValues.getMaxPackedValue(reader1, "field"));
        assertEquals(expected.getDocCount(), PointValues.getDocCount(reader1, "field"));
        assertEquals(expected.size(), PointValues.size(reader1, "field"));
    }
    IOUtils.close(w, reader1, reader2, dir);
}
Also used : PointValues(org.apache.lucene.index.PointValues) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) FloatPoint(org.apache.lucene.document.FloatPoint) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 4 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class Lucene60PointsWriter method writeField.

@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
    PointValues values = reader.getValues(fieldInfo.name);
    boolean singleValuePerDoc = values.size() == values.getDocCount();
    try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, values.size(), singleValuePerDoc)) {
        if (values instanceof MutablePointValues) {
            final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointValues) values);
            if (fp != -1) {
                indexFPs.put(fieldInfo.name, fp);
            }
            return;
        }
        values.intersect(new IntersectVisitor() {

            @Override
            public void visit(int docID) {
                throw new IllegalStateException();
            }

            public void visit(int docID, byte[] packedValue) throws IOException {
                writer.add(packedValue, docID);
            }

            @Override
            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                return Relation.CELL_CROSSES_QUERY;
            }
        });
        // We could have 0 points on merge since all docs with dimensional fields may be deleted:
        if (writer.getPointCount() > 0) {
            indexFPs.put(fieldInfo.name, writer.finish(dataOut));
        }
    }
}
Also used : MutablePointValues(org.apache.lucene.codecs.MutablePointValues) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IOException(java.io.IOException) BKDWriter(org.apache.lucene.util.bkd.BKDWriter) MutablePointValues(org.apache.lucene.codecs.MutablePointValues)

Example 5 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class LatLonPoint method nearest.

/**
   * Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance.
   * <p>
   * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort},
   * but is far more efficient since it takes advantage of properties the indexed BKD tree.  Currently this
   * only works with {@link Lucene60PointsFormat} (used by the default codec).  Multi-valued fields are
   * currently not de-duplicated, so if a document had multiple instances of the specified field that
   * make it into the top n, that document will appear more than once.
   * <p>
   * Documents are ordered by ascending distance from the location. The value returned in {@link FieldDoc} for
   * the hits contains a Double instance with the distance in meters.
   * 
   * @param searcher IndexSearcher to find nearest points from.
   * @param field field name. must not be null.
   * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds.
   * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds.
   * @param n the number of nearest neighbors to retrieve.
   * @return TopFieldDocs containing documents ordered by distance, where the field value for each {@link FieldDoc} is the distance in meters
   * @throws IllegalArgumentException if the underlying PointValues is not a {@code Lucene60PointsReader} (this is a current limitation), or
   *         if {@code field} or {@code searcher} is null, or if {@code latitude}, {@code longitude} or {@code n} are out-of-bounds
   * @throws IOException if an IOException occurs while finding the points.
   */
// TODO: what about multi-valued documents? what happens?
public static TopFieldDocs nearest(IndexSearcher searcher, String field, double latitude, double longitude, int n) throws IOException {
    GeoUtils.checkLatitude(latitude);
    GeoUtils.checkLongitude(longitude);
    if (n < 1) {
        throw new IllegalArgumentException("n must be at least 1; got " + n);
    }
    if (field == null) {
        throw new IllegalArgumentException("field must not be null");
    }
    if (searcher == null) {
        throw new IllegalArgumentException("searcher must not be null");
    }
    List<BKDReader> readers = new ArrayList<>();
    List<Integer> docBases = new ArrayList<>();
    List<Bits> liveDocs = new ArrayList<>();
    int totalHits = 0;
    for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
        PointValues points = leaf.reader().getPointValues(field);
        if (points != null) {
            if (points instanceof BKDReader == false) {
                throw new IllegalArgumentException("can only run on Lucene60PointsReader points implementation, but got " + points);
            }
            totalHits += points.getDocCount();
            BKDReader reader = (BKDReader) points;
            if (reader != null) {
                readers.add(reader);
                docBases.add(leaf.docBase);
                liveDocs.add(leaf.reader().getLiveDocs());
            }
        }
    }
    NearestNeighbor.NearestHit[] hits = NearestNeighbor.nearest(latitude, longitude, readers, liveDocs, docBases, n);
    // Convert to TopFieldDocs:
    ScoreDoc[] scoreDocs = new ScoreDoc[hits.length];
    for (int i = 0; i < hits.length; i++) {
        NearestNeighbor.NearestHit hit = hits[i];
        scoreDocs[i] = new FieldDoc(hit.docID, 0.0f, new Object[] { Double.valueOf(hit.distanceMeters) });
    }
    return new TopFieldDocs(totalHits, scoreDocs, null, 0.0f);
}
Also used : FieldDoc(org.apache.lucene.search.FieldDoc) ArrayList(java.util.ArrayList) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) ScoreDoc(org.apache.lucene.search.ScoreDoc) BKDReader(org.apache.lucene.util.bkd.BKDReader) PointValues(org.apache.lucene.index.PointValues) Bits(org.apache.lucene.util.Bits) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Aggregations

PointValues (org.apache.lucene.index.PointValues)19 LeafReader (org.apache.lucene.index.LeafReader)9 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)9 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 Relation (org.apache.lucene.index.PointValues.Relation)7 IOException (java.io.IOException)6 FieldInfo (org.apache.lucene.index.FieldInfo)6 DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)6 BinaryPoint (org.apache.lucene.document.BinaryPoint)5 Document (org.apache.lucene.document.Document)5 IntPoint (org.apache.lucene.document.IntPoint)5 Directory (org.apache.lucene.store.Directory)5 DoublePoint (org.apache.lucene.document.DoublePoint)4 FloatPoint (org.apache.lucene.document.FloatPoint)4 LongPoint (org.apache.lucene.document.LongPoint)4 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)4 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)4 FSDirectory (org.apache.lucene.store.FSDirectory)3 RAMDirectory (org.apache.lucene.store.RAMDirectory)3 ArrayList (java.util.ArrayList)2