Search in sources :

Example 11 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class PointRangeQuery method createWeight.

@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
            return new IntersectVisitor() {

                DocIdSetBuilder.BulkAdder adder;

                @Override
                public void grow(int count) {
                    adder = result.grow(count);
                }

                @Override
                public void visit(int docID) {
                    adder.add(docID);
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    for (int dim = 0; dim < numDims; dim++) {
                        int offset = dim * bytesPerDim;
                        if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
                            // Doc's value is too low, in this dimension
                            return;
                        }
                        if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
                            // Doc's value is too high, in this dimension
                            return;
                        }
                    }
                    // Doc is in-bounds
                    adder.add(docID);
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    boolean crosses = false;
                    for (int dim = 0; dim < numDims; dim++) {
                        int offset = dim * bytesPerDim;
                        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
                            return Relation.CELL_OUTSIDE_QUERY;
                        }
                        crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
                    }
                    if (crosses) {
                        return Relation.CELL_CROSSES_QUERY;
                    } else {
                        return Relation.CELL_INSIDE_QUERY;
                    }
                }
            };
        }

        /**
       * Create a visitor that clears documents that do NOT match the range.
       */
        private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, int[] cost) {
            return new IntersectVisitor() {

                @Override
                public void visit(int docID) {
                    result.clear(docID);
                    cost[0]--;
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    for (int dim = 0; dim < numDims; dim++) {
                        int offset = dim * bytesPerDim;
                        if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
                            // Doc's value is too low, in this dimension
                            result.clear(docID);
                            cost[0]--;
                            return;
                        }
                        if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
                            // Doc's value is too high, in this dimension
                            result.clear(docID);
                            cost[0]--;
                            return;
                        }
                    }
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    boolean crosses = false;
                    for (int dim = 0; dim < numDims; dim++) {
                        int offset = dim * bytesPerDim;
                        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
                            // This dim is not in the range
                            return Relation.CELL_INSIDE_QUERY;
                        }
                        crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
                    }
                    if (crosses) {
                        return Relation.CELL_CROSSES_QUERY;
                    } else {
                        return Relation.CELL_OUTSIDE_QUERY;
                    }
                }
            };
        }

        @Override
        public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // No docs in this segment/field indexed any points
                return null;
            }
            if (values.getNumDimensions() != numDims) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + values.getNumDimensions() + " but this query has numDims=" + numDims);
            }
            if (bytesPerDim != values.getBytesPerDimension()) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
            }
            boolean allDocsMatch;
            if (values.getDocCount() == reader.maxDoc()) {
                final byte[] fieldPackedLower = values.getMinPackedValue();
                final byte[] fieldPackedUpper = values.getMaxPackedValue();
                allDocsMatch = true;
                for (int i = 0; i < numDims; ++i) {
                    int offset = i * bytesPerDim;
                    if (StringHelper.compare(bytesPerDim, lowerPoint, offset, fieldPackedLower, offset) > 0 || StringHelper.compare(bytesPerDim, upperPoint, offset, fieldPackedUpper, offset) < 0) {
                        allDocsMatch = false;
                        break;
                    }
                }
            } else {
                allDocsMatch = false;
            }
            final Weight weight = this;
            if (allDocsMatch) {
                // all docs have a value and all points are within bounds, so everything matches
                return new ScorerSupplier() {

                    @Override
                    public Scorer get(boolean randomAccess) {
                        return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc()));
                    }

                    @Override
                    public long cost() {
                        return reader.maxDoc();
                    }
                };
            } else {
                return new ScorerSupplier() {

                    final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);

                    final IntersectVisitor visitor = getIntersectVisitor(result);

                    long cost = -1;

                    @Override
                    public Scorer get(boolean randomAccess) throws IOException {
                        if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size() && cost() > reader.maxDoc() / 2) {
                            // If all docs have exactly one value and the cost is greater
                            // than half the leaf size then maybe we can make things faster
                            // by computing the set of documents that do NOT match the range
                            final FixedBitSet result = new FixedBitSet(reader.maxDoc());
                            result.set(0, reader.maxDoc());
                            int[] cost = new int[] { reader.maxDoc() };
                            values.intersect(getInverseIntersectVisitor(result, cost));
                            final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
                            return new ConstantScoreScorer(weight, score(), iterator);
                        }
                        values.intersect(visitor);
                        DocIdSetIterator iterator = result.build().iterator();
                        return new ConstantScoreScorer(weight, score(), iterator);
                    }

                    @Override
                    public long cost() {
                        if (cost == -1) {
                            // Computing the cost may be expensive, so only do it if necessary
                            cost = values.estimatePointCount(visitor);
                            assert cost >= 0;
                        }
                        return cost;
                    }
                };
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            ScorerSupplier scorerSupplier = scorerSupplier(context);
            if (scorerSupplier == null) {
                return null;
            }
            return scorerSupplier.get(false);
        }
    };
}
Also used : BitSetIterator(org.apache.lucene.util.BitSetIterator) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) LeafReader(org.apache.lucene.index.LeafReader) IntPoint(org.apache.lucene.document.IntPoint) PointValues(org.apache.lucene.index.PointValues) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder)

Example 12 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class TestLucene60PointsFormat method testEstimatePointCount2Dims.

// The tree is always balanced in the N dims case, and leaves are
// not all full so things are a bit different
public void testEstimatePointCount2Dims() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    byte[][] pointValue = new byte[2][];
    pointValue[0] = new byte[3];
    pointValue[1] = new byte[3];
    byte[][] uniquePointValue = new byte[2][];
    uniquePointValue[0] = new byte[3];
    uniquePointValue[1] = new byte[3];
    random().nextBytes(uniquePointValue[0]);
    random().nextBytes(uniquePointValue[1]);
    // make sure we have several leaves
    final int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (i == numDocs / 2) {
            doc.add(new BinaryPoint("f", uniquePointValue));
        } else {
            do {
                random().nextBytes(pointValue[0]);
                random().nextBytes(pointValue[1]);
            } while (Arrays.equals(pointValue[0], uniquePointValue[0]) || Arrays.equals(pointValue[1], uniquePointValue[1]));
            doc.add(new BinaryPoint("f", pointValue));
        }
        w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // With >1 dims, the tree is balanced
    int actualMaxPointsInLeafNode = numDocs;
    while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
        actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
    }
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = Integer.highestOneBit((numDocs - 1) / actualMaxPointsInLeafNode) << 1;
    assertEquals(numLeaves * actualMaxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            for (int dim = 0; dim < 2; ++dim) {
                if (StringHelper.compare(3, uniquePointValue[dim], 0, maxPackedValue, dim * 3) > 0 || StringHelper.compare(3, uniquePointValue[dim], 0, minPackedValue, dim * 3) < 0) {
                    return Relation.CELL_OUTSIDE_QUERY;
                }
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (actualMaxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((actualMaxPointsInLeafNode + 1) / 2));
    r.close();
    dir.close();
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 13 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class PointInSetIncludingScoreQuery method createWeight.

@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new Weight(this) {

        @Override
        public void extractTerms(Set<Term> terms) {
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            Scorer scorer = scorer(context);
            if (scorer != null) {
                int target = scorer.iterator().advance(doc);
                if (doc == target) {
                    return Explanation.match(scorer.score(), "A match");
                }
            }
            return Explanation.noMatch("Not a match");
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
            if (fieldInfo == null) {
                return null;
            }
            if (fieldInfo.getPointDimensionCount() != 1) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + fieldInfo.getPointDimensionCount() + " but this query has numDims=1");
            }
            if (fieldInfo.getPointNumBytes() != bytesPerDim) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
            }
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                return null;
            }
            FixedBitSet result = new FixedBitSet(reader.maxDoc());
            float[] scores = new float[reader.maxDoc()];
            values.intersect(new MergePointVisitor(sortedPackedPoints, result, scores));
            return new Scorer(this) {

                DocIdSetIterator disi = new BitSetIterator(result, 10L);

                @Override
                public float score() throws IOException {
                    return scores[docID()];
                }

                @Override
                public int freq() throws IOException {
                    return 1;
                }

                @Override
                public int docID() {
                    return disi.docID();
                }

                @Override
                public DocIdSetIterator iterator() {
                    return disi;
                }
            };
        }
    };
}
Also used : BitSetIterator(org.apache.lucene.util.BitSetIterator) FixedBitSet(org.apache.lucene.util.FixedBitSet) Set(java.util.Set) LeafReader(org.apache.lucene.index.LeafReader) Scorer(org.apache.lucene.search.Scorer) Weight(org.apache.lucene.search.Weight) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) PointValues(org.apache.lucene.index.PointValues) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 14 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class Lucene60PointsWriter method merge.

@Override
public void merge(MergeState mergeState) throws IOException {
    /**
     * If indexSort is activated and some of the leaves are not sorted the next test will catch that and the non-optimized merge will run.
     * If the readers are all sorted then it's safe to perform a bulk merge of the points.
     **/
    for (PointsReader reader : mergeState.pointsReaders) {
        if (reader instanceof Lucene60PointsReader == false) {
            // We can only bulk merge when all to-be-merged segments use our format:
            super.merge(mergeState);
            return;
        }
    }
    for (PointsReader reader : mergeState.pointsReaders) {
        if (reader != null) {
            reader.checkIntegrity();
        }
    }
    for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
        if (fieldInfo.getPointDimensionCount() != 0) {
            if (fieldInfo.getPointDimensionCount() == 1) {
                boolean singleValuePerDoc = true;
                // Worst case total maximum size (if none of the points are deleted):
                long totMaxSize = 0;
                for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                    PointsReader reader = mergeState.pointsReaders[i];
                    if (reader != null) {
                        FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                        FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                        if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                            PointValues values = reader.getValues(fieldInfo.name);
                            if (values != null) {
                                totMaxSize += values.size();
                                singleValuePerDoc &= values.size() == values.getDocCount();
                            }
                        }
                    }
                }
                // we were simply reindexing them:
                try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, totMaxSize, singleValuePerDoc)) {
                    List<BKDReader> bkdReaders = new ArrayList<>();
                    List<MergeState.DocMap> docMaps = new ArrayList<>();
                    for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                        PointsReader reader = mergeState.pointsReaders[i];
                        if (reader != null) {
                            // we confirmed this up above
                            assert reader instanceof Lucene60PointsReader;
                            Lucene60PointsReader reader60 = (Lucene60PointsReader) reader;
                            // NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to this
                            // reader's FieldInfo as we do below) because field numbers can easily be different
                            // when addIndexes(Directory...) copies over segments from another index:
                            FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                            FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                            if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                                BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
                                if (bkdReader != null) {
                                    bkdReaders.add(bkdReader);
                                    docMaps.add(mergeState.docMaps[i]);
                                }
                            }
                        }
                    }
                    long fp = writer.merge(dataOut, docMaps, bkdReaders);
                    if (fp != -1) {
                        indexFPs.put(fieldInfo.name, fp);
                    }
                }
            } else {
                mergeOneField(mergeState, fieldInfo);
            }
        }
    }
    finish();
}
Also used : ArrayList(java.util.ArrayList) FieldInfos(org.apache.lucene.index.FieldInfos) BKDReader(org.apache.lucene.util.bkd.BKDReader) MutablePointValues(org.apache.lucene.codecs.MutablePointValues) PointValues(org.apache.lucene.index.PointValues) PointsReader(org.apache.lucene.codecs.PointsReader) BKDWriter(org.apache.lucene.util.bkd.BKDWriter) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 15 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class TestPointValues method testTieBreakByDocID.

public void testTieBreakByDocID() throws Exception {
    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig iwc = newIndexWriterConfig();
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new IntPoint("int", 17));
    for (int i = 0; i < 300000; i++) {
        w.addDocument(doc);
        if (random().nextInt(1000) == 17) {
            w.commit();
        }
    }
    IndexReader r = DirectoryReader.open(w);
    for (LeafReaderContext ctx : r.leaves()) {
        PointValues points = ctx.reader().getPointValues("int");
        points.intersect(new IntersectVisitor() {

            int lastDocID = -1;

            @Override
            public void visit(int docID) {
                if (docID < lastDocID) {
                    fail("docs out of order: docID=" + docID + " but lastDocID=" + lastDocID);
                }
                lastDocID = docID;
            }

            @Override
            public void visit(int docID, byte[] packedValue) {
                visit(docID);
            }

            @Override
            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                if (random().nextBoolean()) {
                    return Relation.CELL_CROSSES_QUERY;
                } else {
                    return Relation.CELL_INSIDE_QUERY;
                }
            }
        });
    }
    r.close();
    w.close();
    dir.close();
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) FloatPoint(org.apache.lucene.document.FloatPoint) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) IntPoint(org.apache.lucene.document.IntPoint) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Aggregations

PointValues (org.apache.lucene.index.PointValues)19 LeafReader (org.apache.lucene.index.LeafReader)9 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)9 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 Relation (org.apache.lucene.index.PointValues.Relation)7 IOException (java.io.IOException)6 FieldInfo (org.apache.lucene.index.FieldInfo)6 DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)6 BinaryPoint (org.apache.lucene.document.BinaryPoint)5 Document (org.apache.lucene.document.Document)5 IntPoint (org.apache.lucene.document.IntPoint)5 Directory (org.apache.lucene.store.Directory)5 DoublePoint (org.apache.lucene.document.DoublePoint)4 FloatPoint (org.apache.lucene.document.FloatPoint)4 LongPoint (org.apache.lucene.document.LongPoint)4 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)4 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)4 FSDirectory (org.apache.lucene.store.FSDirectory)3 RAMDirectory (org.apache.lucene.store.RAMDirectory)3 ArrayList (java.util.ArrayList)2