Search in sources :

Example 6 with DocIdSetBuilder

use of org.apache.lucene.util.DocIdSetBuilder in project lucene-solr by apache.

the class LatLonPointInPolygonQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
    // used in the first pass:
    // bounding box over all polygons, this can speed up tree intersection/cheaply improve approximation for complex multi-polygons
    // these are pre-encoded with LatLonPoint's encoding
    final Rectangle box = Rectangle.fromPolygon(polygons);
    final byte[] minLat = new byte[Integer.BYTES];
    final byte[] maxLat = new byte[Integer.BYTES];
    final byte[] minLon = new byte[Integer.BYTES];
    final byte[] maxLon = new byte[Integer.BYTES];
    NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
    NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
    NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
    NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
    final Polygon2D tree = Polygon2D.create(polygons);
    final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
    return new ConstantScoreWeight(this, boost) {

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // No docs in this segment had any points fields
                return null;
            }
            FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
            if (fieldInfo == null) {
                // No docs in this segment indexed this field at all
                return null;
            }
            LatLonPoint.checkCompatible(fieldInfo);
            // matching docids
            DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            values.intersect(new IntersectVisitor() {

                DocIdSetBuilder.BulkAdder adder;

                @Override
                public void grow(int count) {
                    adder = result.grow(count);
                }

                @Override
                public void visit(int docID) {
                    adder.add(docID);
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0), NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
                        adder.add(docID);
                    }
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    if (StringHelper.compare(Integer.BYTES, minPackedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, 0, minLat, 0) < 0 || StringHelper.compare(Integer.BYTES, minPackedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon, 0) < 0) {
                        // outside of global bounding box range
                        return Relation.CELL_OUTSIDE_QUERY;
                    }
                    double cellMinLat = decodeLatitude(minPackedValue, 0);
                    double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES);
                    double cellMaxLat = decodeLatitude(maxPackedValue, 0);
                    double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
                    return tree.relate(cellMinLat, cellMaxLat, cellMinLon, cellMaxLon);
                }
            });
            return new ConstantScoreScorer(this, score(), result.build().iterator());
        }
    };
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) LeafReader(org.apache.lucene.index.LeafReader) GeoEncodingUtils(org.apache.lucene.geo.GeoEncodingUtils) Rectangle(org.apache.lucene.geo.Rectangle) Polygon2D(org.apache.lucene.geo.Polygon2D) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 7 with DocIdSetBuilder

use of org.apache.lucene.util.DocIdSetBuilder in project lucene-solr by apache.

the class TestGeo3DPoint method explain.

public static String explain(String fieldName, GeoShape shape, GeoPoint targetDocPoint, GeoPoint scaledDocPoint, IndexReader reader, int docID) throws Exception {
    final XYZBounds bounds = new XYZBounds();
    shape.getBounds(bounds);
    // First find the leaf reader that owns this doc:
    int subIndex = ReaderUtil.subIndex(docID, reader.leaves());
    LeafReader leafReader = reader.leaves().get(subIndex).reader();
    StringBuilder b = new StringBuilder();
    b.append("target is in leaf " + leafReader + " of full reader " + reader + "\n");
    DocIdSetBuilder hits = new DocIdSetBuilder(leafReader.maxDoc());
    ExplainingVisitor visitor = new ExplainingVisitor(shape, targetDocPoint, scaledDocPoint, new PointInShapeIntersectVisitor(hits, shape, bounds), docID - reader.leaves().get(subIndex).docBase, 3, Integer.BYTES, b);
    // Do first phase, where we just figure out the "path" that leads to the target docID:
    leafReader.getPointValues(fieldName).intersect(visitor);
    // Do second phase, where we we see how the wrapped visitor responded along that path:
    visitor.startSecondPhase();
    leafReader.getPointValues(fieldName).intersect(visitor);
    return b.toString();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) XYZBounds(org.apache.lucene.spatial3d.geom.XYZBounds) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) GeoPoint(org.apache.lucene.spatial3d.geom.GeoPoint)

Example 8 with DocIdSetBuilder

use of org.apache.lucene.util.DocIdSetBuilder in project lucene-solr by apache.

the class MultiTermQueryConstantScoreWrapper method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        /** Try to collect terms from the given terms enum and return true iff all
       *  terms could be collected. If {@code false} is returned, the enum is
       *  left positioned on the next term. */
        private boolean collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
            final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
            for (int i = 0; i < threshold; ++i) {
                final BytesRef term = termsEnum.next();
                if (term == null) {
                    return true;
                }
                TermState state = termsEnum.termState();
                terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
            }
            return termsEnum.next() == null;
        }

        /**
       * On the given leaf context, try to either rewrite to a disjunction if
       * there are few terms, or build a bitset containing matching docs.
       */
        private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(query.field);
            if (terms == null) {
                // field does not exist
                return new WeightOrDocIdSet((DocIdSet) null);
            }
            final TermsEnum termsEnum = query.getTermsEnum(terms);
            assert termsEnum != null;
            PostingsEnum docs = null;
            final List<TermAndState> collectedTerms = new ArrayList<>();
            if (collectTerms(context, termsEnum, collectedTerms)) {
                // build a boolean query
                BooleanQuery.Builder bq = new BooleanQuery.Builder();
                for (TermAndState t : collectedTerms) {
                    final TermContext termContext = new TermContext(searcher.getTopReaderContext());
                    termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
                    bq.add(new TermQuery(new Term(query.field, t.term), termContext), Occur.SHOULD);
                }
                Query q = new ConstantScoreQuery(bq.build());
                final Weight weight = searcher.rewrite(q).createWeight(searcher, needsScores, score());
                return new WeightOrDocIdSet(weight);
            }
            // Too many terms: go back to the terms we already collected and start building the bit set
            DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
            if (collectedTerms.isEmpty() == false) {
                TermsEnum termsEnum2 = terms.iterator();
                for (TermAndState t : collectedTerms) {
                    termsEnum2.seekExact(t.term, t.state);
                    docs = termsEnum2.postings(docs, PostingsEnum.NONE);
                    builder.add(docs);
                }
            }
            // Then keep filling the bit set with remaining terms
            do {
                docs = termsEnum.postings(docs, PostingsEnum.NONE);
                builder.add(docs);
            } while (termsEnum.next() != null);
            return new WeightOrDocIdSet(builder.build());
        }

        private Scorer scorer(DocIdSet set) throws IOException {
            if (set == null) {
                return null;
            }
            final DocIdSetIterator disi = set.iterator();
            if (disi == null) {
                return null;
            }
            return new ConstantScoreScorer(this, score(), disi);
        }

        @Override
        public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
            final WeightOrDocIdSet weightOrBitSet = rewrite(context);
            if (weightOrBitSet.weight != null) {
                return weightOrBitSet.weight.bulkScorer(context);
            } else {
                final Scorer scorer = scorer(weightOrBitSet.set);
                if (scorer == null) {
                    return null;
                }
                return new DefaultBulkScorer(scorer);
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final WeightOrDocIdSet weightOrBitSet = rewrite(context);
            if (weightOrBitSet.weight != null) {
                return weightOrBitSet.weight.scorer(context);
            } else {
                return scorer(weightOrBitSet.set);
            }
        }
    };
}
Also used : DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) ArrayList(java.util.ArrayList) TermContext(org.apache.lucene.index.TermContext) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ArrayList(java.util.ArrayList) List(java.util.List) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) TermState(org.apache.lucene.index.TermState) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder)

Example 9 with DocIdSetBuilder

use of org.apache.lucene.util.DocIdSetBuilder in project lucene-solr by apache.

the class PointInSetQuery method createWeight.

@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // No docs in this segment/field indexed any points
                return null;
            }
            if (values.getNumDimensions() != numDims) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + values.getNumDimensions() + " but this query has numDims=" + numDims);
            }
            if (values.getBytesPerDimension() != bytesPerDim) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
            }
            DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            if (numDims == 1) {
                // We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
                values.intersect(new MergePointVisitor(sortedPackedPoints, result));
            } else {
                // NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect.  We could instead do a similar
                // optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
                // index, which is probably tricky!
                SinglePointVisitor visitor = new SinglePointVisitor(result);
                TermIterator iterator = sortedPackedPoints.iterator();
                for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
                    visitor.setPoint(point);
                    values.intersect(visitor);
                }
            }
            return new ConstantScoreScorer(this, score(), result.build().iterator());
        }
    };
}
Also used : PointValues(org.apache.lucene.index.PointValues) LeafReader(org.apache.lucene.index.LeafReader) TermIterator(org.apache.lucene.index.PrefixCodedTerms.TermIterator) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 10 with DocIdSetBuilder

use of org.apache.lucene.util.DocIdSetBuilder in project lucene-solr by apache.

the class PointRangeQuery method createWeight.

@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
            return new IntersectVisitor() {

                DocIdSetBuilder.BulkAdder adder;

                @Override
                public void grow(int count) {
                    adder = result.grow(count);
                }

                @Override
                public void visit(int docID) {
                    adder.add(docID);
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    for (int dim = 0; dim < numDims; dim++) {
                        int offset = dim * bytesPerDim;
                        if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
                            // Doc's value is too low, in this dimension
                            return;
                        }
                        if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
                            // Doc's value is too high, in this dimension
                            return;
                        }
                    }
                    // Doc is in-bounds
                    adder.add(docID);
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    boolean crosses = false;
                    for (int dim = 0; dim < numDims; dim++) {
                        int offset = dim * bytesPerDim;
                        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
                            return Relation.CELL_OUTSIDE_QUERY;
                        }
                        crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
                    }
                    if (crosses) {
                        return Relation.CELL_CROSSES_QUERY;
                    } else {
                        return Relation.CELL_INSIDE_QUERY;
                    }
                }
            };
        }

        /**
       * Create a visitor that clears documents that do NOT match the range.
       */
        private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, int[] cost) {
            return new IntersectVisitor() {

                @Override
                public void visit(int docID) {
                    result.clear(docID);
                    cost[0]--;
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    for (int dim = 0; dim < numDims; dim++) {
                        int offset = dim * bytesPerDim;
                        if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
                            // Doc's value is too low, in this dimension
                            result.clear(docID);
                            cost[0]--;
                            return;
                        }
                        if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
                            // Doc's value is too high, in this dimension
                            result.clear(docID);
                            cost[0]--;
                            return;
                        }
                    }
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    boolean crosses = false;
                    for (int dim = 0; dim < numDims; dim++) {
                        int offset = dim * bytesPerDim;
                        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
                            // This dim is not in the range
                            return Relation.CELL_INSIDE_QUERY;
                        }
                        crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
                    }
                    if (crosses) {
                        return Relation.CELL_CROSSES_QUERY;
                    } else {
                        return Relation.CELL_OUTSIDE_QUERY;
                    }
                }
            };
        }

        @Override
        public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // No docs in this segment/field indexed any points
                return null;
            }
            if (values.getNumDimensions() != numDims) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + values.getNumDimensions() + " but this query has numDims=" + numDims);
            }
            if (bytesPerDim != values.getBytesPerDimension()) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
            }
            boolean allDocsMatch;
            if (values.getDocCount() == reader.maxDoc()) {
                final byte[] fieldPackedLower = values.getMinPackedValue();
                final byte[] fieldPackedUpper = values.getMaxPackedValue();
                allDocsMatch = true;
                for (int i = 0; i < numDims; ++i) {
                    int offset = i * bytesPerDim;
                    if (StringHelper.compare(bytesPerDim, lowerPoint, offset, fieldPackedLower, offset) > 0 || StringHelper.compare(bytesPerDim, upperPoint, offset, fieldPackedUpper, offset) < 0) {
                        allDocsMatch = false;
                        break;
                    }
                }
            } else {
                allDocsMatch = false;
            }
            final Weight weight = this;
            if (allDocsMatch) {
                // all docs have a value and all points are within bounds, so everything matches
                return new ScorerSupplier() {

                    @Override
                    public Scorer get(boolean randomAccess) {
                        return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc()));
                    }

                    @Override
                    public long cost() {
                        return reader.maxDoc();
                    }
                };
            } else {
                return new ScorerSupplier() {

                    final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);

                    final IntersectVisitor visitor = getIntersectVisitor(result);

                    long cost = -1;

                    @Override
                    public Scorer get(boolean randomAccess) throws IOException {
                        if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size() && cost() > reader.maxDoc() / 2) {
                            // If all docs have exactly one value and the cost is greater
                            // than half the leaf size then maybe we can make things faster
                            // by computing the set of documents that do NOT match the range
                            final FixedBitSet result = new FixedBitSet(reader.maxDoc());
                            result.set(0, reader.maxDoc());
                            int[] cost = new int[] { reader.maxDoc() };
                            values.intersect(getInverseIntersectVisitor(result, cost));
                            final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
                            return new ConstantScoreScorer(weight, score(), iterator);
                        }
                        values.intersect(visitor);
                        DocIdSetIterator iterator = result.build().iterator();
                        return new ConstantScoreScorer(weight, score(), iterator);
                    }

                    @Override
                    public long cost() {
                        if (cost == -1) {
                            // Computing the cost may be expensive, so only do it if necessary
                            cost = values.estimatePointCount(visitor);
                            assert cost >= 0;
                        }
                        return cost;
                    }
                };
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            ScorerSupplier scorerSupplier = scorerSupplier(context);
            if (scorerSupplier == null) {
                return null;
            }
            return scorerSupplier.get(false);
        }
    };
}
Also used : BitSetIterator(org.apache.lucene.util.BitSetIterator) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) LeafReader(org.apache.lucene.index.LeafReader) IntPoint(org.apache.lucene.document.IntPoint) PointValues(org.apache.lucene.index.PointValues) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder)

Aggregations

DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)12 LeafReader (org.apache.lucene.index.LeafReader)8 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 PointValues (org.apache.lucene.index.PointValues)6 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)4 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)4 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)4 BytesRef (org.apache.lucene.util.BytesRef)4 FieldInfo (org.apache.lucene.index.FieldInfo)3 PostingsEnum (org.apache.lucene.index.PostingsEnum)3 Terms (org.apache.lucene.index.Terms)3 TermsEnum (org.apache.lucene.index.TermsEnum)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Rectangle (org.apache.lucene.geo.Rectangle)2 Relation (org.apache.lucene.index.PointValues.Relation)2 TermIterator (org.apache.lucene.index.PrefixCodedTerms.TermIterator)2 Term (org.apache.lucene.index.Term)2 TermContext (org.apache.lucene.index.TermContext)2 FixedBitSet (org.apache.lucene.util.FixedBitSet)2