Search in sources :

Example 16 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class TestPointValues method testSparsePoints.

public void testSparsePoints() throws Exception {
    Directory dir = newDirectory();
    int numDocs = atLeast(1000);
    int numFields = TestUtil.nextInt(random(), 1, 10);
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    int[] fieldDocCounts = new int[numFields];
    int[] fieldSizes = new int[numFields];
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        for (int field = 0; field < numFields; field++) {
            String fieldName = "int" + field;
            if (random().nextInt(100) == 17) {
                doc.add(new IntPoint(fieldName, random().nextInt()));
                fieldDocCounts[field]++;
                fieldSizes[field]++;
                if (random().nextInt(10) == 5) {
                    // add same field again!
                    doc.add(new IntPoint(fieldName, random().nextInt()));
                    fieldSizes[field]++;
                }
            }
        }
        w.addDocument(doc);
    }
    IndexReader r = w.getReader();
    for (int field = 0; field < numFields; field++) {
        int docCount = 0;
        int size = 0;
        String fieldName = "int" + field;
        for (LeafReaderContext ctx : r.leaves()) {
            PointValues points = ctx.reader().getPointValues(fieldName);
            if (points != null) {
                docCount += points.getDocCount();
                size += points.size();
            }
        }
        assertEquals(fieldDocCounts[field], docCount);
        assertEquals(fieldSizes[field], size);
    }
    r.close();
    w.close();
    dir.close();
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) PointValues(org.apache.lucene.index.PointValues) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) FloatPoint(org.apache.lucene.document.FloatPoint) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 17 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class LatLonPointDistanceQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    Rectangle box = Rectangle.fromPointDistance(latitude, longitude, radiusMeters);
    // create bounding box(es) for the distance range
    // these are pre-encoded with LatLonPoint's encoding
    final byte[] minLat = new byte[Integer.BYTES];
    final byte[] maxLat = new byte[Integer.BYTES];
    final byte[] minLon = new byte[Integer.BYTES];
    final byte[] maxLon = new byte[Integer.BYTES];
    // second set of longitude ranges to check (for cross-dateline case)
    final byte[] minLon2 = new byte[Integer.BYTES];
    NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
    NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
    // crosses dateline: split
    if (box.crossesDateline()) {
        // box1
        NumericUtils.intToSortableBytes(Integer.MIN_VALUE, minLon, 0);
        NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
        // box2
        NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon2, 0);
    } else {
        NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
        NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
        // disable box2
        NumericUtils.intToSortableBytes(Integer.MAX_VALUE, minLon2, 0);
    }
    // compute exact sort key: avoid any asin() computations
    final double sortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
    final double axisLat = Rectangle.axisLat(latitude, radiusMeters);
    return new ConstantScoreWeight(this, boost) {

        final GeoEncodingUtils.DistancePredicate distancePredicate = GeoEncodingUtils.createDistancePredicate(latitude, longitude, radiusMeters);

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            ScorerSupplier scorerSupplier = scorerSupplier(context);
            if (scorerSupplier == null) {
                return null;
            }
            return scorerSupplier.get(false);
        }

        @Override
        public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // No docs in this segment had any points fields
                return null;
            }
            FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
            if (fieldInfo == null) {
                // No docs in this segment indexed this field at all
                return null;
            }
            LatLonPoint.checkCompatible(fieldInfo);
            // matching docids
            DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            final IntersectVisitor visitor = new IntersectVisitor() {

                DocIdSetBuilder.BulkAdder adder;

                @Override
                public void grow(int count) {
                    adder = result.grow(count);
                }

                @Override
                public void visit(int docID) {
                    adder.add(docID);
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    // bounding box check
                    if (StringHelper.compare(Integer.BYTES, packedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, packedValue, 0, minLat, 0) < 0) {
                        // latitude out of bounding box range
                        return;
                    }
                    if ((StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon, 0) < 0) && StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon2, 0) < 0) {
                        // longitude out of bounding box range
                        return;
                    }
                    int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
                    int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
                    if (distancePredicate.test(docLatitude, docLongitude)) {
                        adder.add(docID);
                    }
                }

                // algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
                // 1. check our bounding box(es) first. if the subtree is entirely outside of those, bail.
                // 2. check if the subtree is disjoint. it may cross the bounding box but not intersect with circle
                // 3. see if the subtree is fully contained. if the subtree is enormous along the x axis, wrapping half way around the world, etc: then this can't work, just go to step 4.
                // 4. recurse naively (subtrees crossing over circle edge)
                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    if (StringHelper.compare(Integer.BYTES, minPackedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, 0, minLat, 0) < 0) {
                        // latitude out of bounding box range
                        return Relation.CELL_OUTSIDE_QUERY;
                    }
                    if ((StringHelper.compare(Integer.BYTES, minPackedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon, 0) < 0) && StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon2, 0) < 0) {
                        // longitude out of bounding box range
                        return Relation.CELL_OUTSIDE_QUERY;
                    }
                    double latMin = decodeLatitude(minPackedValue, 0);
                    double lonMin = decodeLongitude(minPackedValue, Integer.BYTES);
                    double latMax = decodeLatitude(maxPackedValue, 0);
                    double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES);
                    return GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat);
                }
            };
            final Weight weight = this;
            return new ScorerSupplier() {

                long cost = -1;

                @Override
                public Scorer get(boolean randomAccess) throws IOException {
                    values.intersect(visitor);
                    return new ConstantScoreScorer(weight, score(), result.build().iterator());
                }

                @Override
                public long cost() {
                    if (cost == -1) {
                        cost = values.estimatePointCount(visitor);
                    }
                    assert cost >= 0;
                    return cost;
                }
            };
        }
    };
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) LeafReader(org.apache.lucene.index.LeafReader) Rectangle(org.apache.lucene.geo.Rectangle) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) Weight(org.apache.lucene.search.Weight) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) PointValues(org.apache.lucene.index.PointValues) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) ScorerSupplier(org.apache.lucene.search.ScorerSupplier) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 18 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class LatLonPointInPolygonQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
    // used in the first pass:
    // bounding box over all polygons, this can speed up tree intersection/cheaply improve approximation for complex multi-polygons
    // these are pre-encoded with LatLonPoint's encoding
    final Rectangle box = Rectangle.fromPolygon(polygons);
    final byte[] minLat = new byte[Integer.BYTES];
    final byte[] maxLat = new byte[Integer.BYTES];
    final byte[] minLon = new byte[Integer.BYTES];
    final byte[] maxLon = new byte[Integer.BYTES];
    NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
    NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
    NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
    NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
    final Polygon2D tree = Polygon2D.create(polygons);
    final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
    return new ConstantScoreWeight(this, boost) {

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // No docs in this segment had any points fields
                return null;
            }
            FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
            if (fieldInfo == null) {
                // No docs in this segment indexed this field at all
                return null;
            }
            LatLonPoint.checkCompatible(fieldInfo);
            // matching docids
            DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            values.intersect(new IntersectVisitor() {

                DocIdSetBuilder.BulkAdder adder;

                @Override
                public void grow(int count) {
                    adder = result.grow(count);
                }

                @Override
                public void visit(int docID) {
                    adder.add(docID);
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0), NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
                        adder.add(docID);
                    }
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    if (StringHelper.compare(Integer.BYTES, minPackedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, 0, minLat, 0) < 0 || StringHelper.compare(Integer.BYTES, minPackedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon, 0) < 0) {
                        // outside of global bounding box range
                        return Relation.CELL_OUTSIDE_QUERY;
                    }
                    double cellMinLat = decodeLatitude(minPackedValue, 0);
                    double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES);
                    double cellMaxLat = decodeLatitude(maxPackedValue, 0);
                    double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
                    return tree.relate(cellMinLat, cellMaxLat, cellMinLon, cellMaxLon);
                }
            });
            return new ConstantScoreScorer(this, score(), result.build().iterator());
        }
    };
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) LeafReader(org.apache.lucene.index.LeafReader) GeoEncodingUtils(org.apache.lucene.geo.GeoEncodingUtils) Rectangle(org.apache.lucene.geo.Rectangle) Polygon2D(org.apache.lucene.geo.Polygon2D) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 19 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class TestDocIdSetBuilder method testLeverageStats.

public void testLeverageStats() throws IOException {
    // single-valued points
    PointValues values = new DummyPointValues(42, 42);
    DocIdSetBuilder builder = new DocIdSetBuilder(100, values, "foo");
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertFalse(builder.multivalued);
    DocIdSetBuilder.BulkAdder adder = builder.grow(2);
    adder.add(5);
    adder.add(7);
    DocIdSet set = builder.build();
    assertTrue(set instanceof BitDocIdSet);
    assertEquals(2, set.iterator().cost());
    // multi-valued points
    values = new DummyPointValues(42, 63);
    builder = new DocIdSetBuilder(100, values, "foo");
    assertEquals(1.5, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
    adder = builder.grow(2);
    adder.add(5);
    adder.add(7);
    set = builder.build();
    assertTrue(set instanceof BitDocIdSet);
    // it thinks the same doc was added twice
    assertEquals(1, set.iterator().cost());
    // incomplete stats
    values = new DummyPointValues(42, -1);
    builder = new DocIdSetBuilder(100, values, "foo");
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
    values = new DummyPointValues(-1, 84);
    builder = new DocIdSetBuilder(100, values, "foo");
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
    // single-valued terms
    Terms terms = new DummyTerms(42, 42);
    builder = new DocIdSetBuilder(100, terms);
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertFalse(builder.multivalued);
    adder = builder.grow(2);
    adder.add(5);
    adder.add(7);
    set = builder.build();
    assertTrue(set instanceof BitDocIdSet);
    assertEquals(2, set.iterator().cost());
    // multi-valued terms
    terms = new DummyTerms(42, 63);
    builder = new DocIdSetBuilder(100, terms);
    assertEquals(1.5, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
    adder = builder.grow(2);
    adder.add(5);
    adder.add(7);
    set = builder.build();
    assertTrue(set instanceof BitDocIdSet);
    // it thinks the same doc was added twice
    assertEquals(1, set.iterator().cost());
    // incomplete stats
    terms = new DummyTerms(42, -1);
    builder = new DocIdSetBuilder(100, terms);
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
    terms = new DummyTerms(-1, 84);
    builder = new DocIdSetBuilder(100, terms);
    assertEquals(1d, builder.numValuesPerDoc, 0d);
    assertTrue(builder.multivalued);
}
Also used : PointValues(org.apache.lucene.index.PointValues) Terms(org.apache.lucene.index.Terms) DocIdSet(org.apache.lucene.search.DocIdSet)

Aggregations

PointValues (org.apache.lucene.index.PointValues)19 LeafReader (org.apache.lucene.index.LeafReader)9 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)9 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 Relation (org.apache.lucene.index.PointValues.Relation)7 IOException (java.io.IOException)6 FieldInfo (org.apache.lucene.index.FieldInfo)6 DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)6 BinaryPoint (org.apache.lucene.document.BinaryPoint)5 Document (org.apache.lucene.document.Document)5 IntPoint (org.apache.lucene.document.IntPoint)5 Directory (org.apache.lucene.store.Directory)5 DoublePoint (org.apache.lucene.document.DoublePoint)4 FloatPoint (org.apache.lucene.document.FloatPoint)4 LongPoint (org.apache.lucene.document.LongPoint)4 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)4 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)4 FSDirectory (org.apache.lucene.store.FSDirectory)3 RAMDirectory (org.apache.lucene.store.RAMDirectory)3 ArrayList (java.util.ArrayList)2