Search in sources :

Example 1 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class RangeFieldQuery method createWeight.

@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        final RangeFieldComparator target = new RangeFieldComparator();

        private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
            DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            values.intersect(new IntersectVisitor() {

                DocIdSetBuilder.BulkAdder adder;

                @Override
                public void grow(int count) {
                    adder = result.grow(count);
                }

                @Override
                public void visit(int docID) throws IOException {
                    adder.add(docID);
                }

                @Override
                public void visit(int docID, byte[] leaf) throws IOException {
                    if (target.matches(leaf)) {
                        adder.add(docID);
                    }
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    return compareRange(minPackedValue, maxPackedValue);
                }
            });
            return result.build();
        }

        private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
            byte[] node = getInternalRange(minPackedValue, maxPackedValue);
            // compute range relation for BKD traversal
            if (target.intersects(node) == false) {
                return Relation.CELL_OUTSIDE_QUERY;
            } else if (target.within(node)) {
                // target within cell; continue traversing:
                return Relation.CELL_CROSSES_QUERY;
            } else if (target.contains(node)) {
                // target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
                return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ? Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
            }
            // target intersects cell; continue traversing:
            return Relation.CELL_CROSSES_QUERY;
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // no docs in this segment indexed any ranges
                return null;
            }
            FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
            if (fieldInfo == null) {
                // no docs in this segment indexed this field
                return null;
            }
            checkFieldInfo(fieldInfo);
            boolean allDocsMatch = false;
            if (values.getDocCount() == reader.maxDoc() && compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
                allDocsMatch = true;
            }
            DocIdSetIterator iterator = allDocsMatch == true ? DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
            return new ConstantScoreScorer(this, score(), iterator);
        }

        /** get an encoded byte representation of the internal node; this is
       *  the lower half of the min array and the upper half of the max array */
        private byte[] getInternalRange(byte[] min, byte[] max) {
            byte[] range = new byte[min.length];
            final int dimSize = numDims * bytesPerDim;
            System.arraycopy(min, 0, range, 0, dimSize);
            System.arraycopy(max, dimSize, range, dimSize, dimSize);
            return range;
        }
    };
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) LeafReader(org.apache.lucene.index.LeafReader) IOException(java.io.IOException) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 2 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class TestLucene60PointsFormat method testEstimatePointCount.

public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // number of points per leaf hard to predict
    while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
        iwc.setMergePolicy(newMergePolicy());
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] pointValue = new byte[3];
    byte[] uniquePointValue = new byte[3];
    random().nextBytes(uniquePointValue);
    // make sure we have several leaves
    final int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (i == numDocs / 2) {
            doc.add(new BinaryPoint("f", uniquePointValue));
        } else {
            do {
                random().nextBytes(pointValue);
            } while (Arrays.equals(pointValue, uniquePointValue));
            doc.add(new BinaryPoint("f", pointValue));
        }
        w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
    assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
                return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (maxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((maxPointsInLeafNode + 1) / 2));
    r.close();
    dir.close();
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) MockRandomMergePolicy(org.apache.lucene.index.MockRandomMergePolicy) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 3 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class SimpleTextBKDWriter method writeField1Dim.

/* In the 1D case, we can simply sort points in ascending order and use the
   * same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
    MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size()));
    final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
    reader.intersect(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
            oneDimWriter.add(packedValue, docID);
        }

        @Override
        public void visit(int docID) throws IOException {
            throw new IllegalStateException();
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    return oneDimWriter.finish();
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IOException(java.io.IOException)

Example 4 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class Lucene60PointsWriter method writeField.

@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
    PointValues values = reader.getValues(fieldInfo.name);
    boolean singleValuePerDoc = values.size() == values.getDocCount();
    try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, values.size(), singleValuePerDoc)) {
        if (values instanceof MutablePointValues) {
            final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointValues) values);
            if (fp != -1) {
                indexFPs.put(fieldInfo.name, fp);
            }
            return;
        }
        values.intersect(new IntersectVisitor() {

            @Override
            public void visit(int docID) {
                throw new IllegalStateException();
            }

            public void visit(int docID, byte[] packedValue) throws IOException {
                writer.add(packedValue, docID);
            }

            @Override
            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                return Relation.CELL_CROSSES_QUERY;
            }
        });
        // We could have 0 points on merge since all docs with dimensional fields may be deleted:
        if (writer.getPointCount() > 0) {
            indexFPs.put(fieldInfo.name, writer.finish(dataOut));
        }
    }
}
Also used : MutablePointValues(org.apache.lucene.codecs.MutablePointValues) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IOException(java.io.IOException) BKDWriter(org.apache.lucene.util.bkd.BKDWriter) MutablePointValues(org.apache.lucene.codecs.MutablePointValues)

Example 5 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class BasePointsFormatTestCase method testBasic.

public void testBasic() throws Exception {
    Directory dir = getDirectory(20);
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] point = new byte[4];
    for (int i = 0; i < 20; i++) {
        Document doc = new Document();
        NumericUtils.intToSortableBytes(i, point, 0);
        doc.add(new BinaryPoint("dim", point));
        w.addDocument(doc);
    }
    w.forceMerge(1);
    w.close();
    DirectoryReader r = DirectoryReader.open(dir);
    LeafReader sub = getOnlyLeafReader(r);
    PointValues values = sub.getPointValues("dim");
    // Simple test: make sure intersect can visit every doc:
    BitSet seen = new BitSet();
    values.intersect(new IntersectVisitor() {

        @Override
        public Relation compare(byte[] minPacked, byte[] maxPacked) {
            return Relation.CELL_CROSSES_QUERY;
        }

        public void visit(int docID) {
            throw new IllegalStateException();
        }

        public void visit(int docID, byte[] packedValue) {
            seen.set(docID);
            assertEquals(docID, NumericUtils.sortableBytesToInt(packedValue, 0));
        }
    });
    assertEquals(20, seen.cardinality());
    IOUtils.close(r, dir);
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) BitSet(java.util.BitSet) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) Relation(org.apache.lucene.index.PointValues.Relation) Directory(org.apache.lucene.store.Directory)

Aggregations

IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)24 Relation (org.apache.lucene.index.PointValues.Relation)22 Directory (org.apache.lucene.store.Directory)14 IOException (java.io.IOException)9 PointValues (org.apache.lucene.index.PointValues)9 IndexInput (org.apache.lucene.store.IndexInput)9 IndexOutput (org.apache.lucene.store.IndexOutput)9 BitSet (java.util.BitSet)8 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)8 BinaryPoint (org.apache.lucene.document.BinaryPoint)7 Document (org.apache.lucene.document.Document)7 FilterDirectory (org.apache.lucene.store.FilterDirectory)7 IntPoint (org.apache.lucene.document.IntPoint)6 LeafReader (org.apache.lucene.index.LeafReader)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)4 FieldInfo (org.apache.lucene.index.FieldInfo)3 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)3 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)3 BigInteger (java.math.BigInteger)2