Search in sources :

Example 6 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class TestLucene60PointsFormat method testEstimatePointCount.

public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // number of points per leaf hard to predict
    while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
        iwc.setMergePolicy(newMergePolicy());
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] pointValue = new byte[3];
    byte[] uniquePointValue = new byte[3];
    random().nextBytes(uniquePointValue);
    // make sure we have several leaves
    final int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (i == numDocs / 2) {
            doc.add(new BinaryPoint("f", uniquePointValue));
        } else {
            do {
                random().nextBytes(pointValue);
            } while (Arrays.equals(pointValue, uniquePointValue));
            doc.add(new BinaryPoint("f", pointValue));
        }
        w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
    assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
                return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (maxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((maxPointsInLeafNode + 1) / 2));
    r.close();
    dir.close();
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) MockRandomMergePolicy(org.apache.lucene.index.MockRandomMergePolicy) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 7 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class TestPointValues method doTestMergedStats.

private void doTestMergedStats() throws IOException {
    final int numDims = TestUtil.nextInt(random(), 1, 8);
    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
    Directory dir = new RAMDirectory();
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
    final int numDocs = TestUtil.nextInt(random(), 10, 20);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        final int numPoints = random().nextInt(3);
        for (int j = 0; j < numPoints; ++j) {
            doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
        }
        w.addDocument(doc);
        if (random().nextBoolean()) {
            DirectoryReader.open(w).close();
        }
    }
    final IndexReader reader1 = DirectoryReader.open(w);
    w.forceMerge(1);
    final IndexReader reader2 = DirectoryReader.open(w);
    final PointValues expected = getOnlyLeafReader(reader2).getPointValues("field");
    if (expected == null) {
        assertNull(PointValues.getMinPackedValue(reader1, "field"));
        assertNull(PointValues.getMaxPackedValue(reader1, "field"));
        assertEquals(0, PointValues.getDocCount(reader1, "field"));
        assertEquals(0, PointValues.size(reader1, "field"));
    } else {
        assertArrayEquals(expected.getMinPackedValue(), PointValues.getMinPackedValue(reader1, "field"));
        assertArrayEquals(expected.getMaxPackedValue(), PointValues.getMaxPackedValue(reader1, "field"));
        assertEquals(expected.getDocCount(), PointValues.getDocCount(reader1, "field"));
        assertEquals(expected.size(), PointValues.size(reader1, "field"));
    }
    IOUtils.close(w, reader1, reader2, dir);
}
Also used : PointValues(org.apache.lucene.index.PointValues) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) FloatPoint(org.apache.lucene.document.FloatPoint) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 8 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class PointsWriter method mergeOneField.

/** Default naive merge implementation for one field: it just re-indexes all the values
   *  from the incoming segment.  The default codec overrides this for 1D fields and uses
   *  a faster but more complex implementation. */
protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException {
    long maxPointCount = 0;
    int docCount = 0;
    for (int i = 0; i < mergeState.pointsReaders.length; i++) {
        PointsReader pointsReader = mergeState.pointsReaders[i];
        if (pointsReader != null) {
            FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
            if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                PointValues values = pointsReader.getValues(fieldInfo.name);
                if (values != null) {
                    maxPointCount += values.size();
                    docCount += values.getDocCount();
                }
            }
        }
    }
    final long finalMaxPointCount = maxPointCount;
    final int finalDocCount = docCount;
    writeField(fieldInfo, new PointsReader() {

        @Override
        public long ramBytesUsed() {
            return 0;
        }

        @Override
        public void close() throws IOException {
        }

        @Override
        public PointValues getValues(String fieldName) {
            if (fieldName.equals(fieldInfo.name) == false) {
                throw new IllegalArgumentException("field name must match the field being merged");
            }
            return new PointValues() {

                @Override
                public void intersect(IntersectVisitor mergedVisitor) throws IOException {
                    for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                        PointsReader pointsReader = mergeState.pointsReaders[i];
                        if (pointsReader == null) {
                            // This segment has no points
                            continue;
                        }
                        FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldName);
                        if (readerFieldInfo == null) {
                            // This segment never saw this field
                            continue;
                        }
                        if (readerFieldInfo.getPointDimensionCount() == 0) {
                            // This segment saw this field, but the field did not index points in it:
                            continue;
                        }
                        PointValues values = pointsReader.getValues(fieldName);
                        if (values == null) {
                            continue;
                        }
                        MergeState.DocMap docMap = mergeState.docMaps[i];
                        values.intersect(new IntersectVisitor() {

                            @Override
                            public void visit(int docID) {
                                // Should never be called because our compare method never returns Relation.CELL_INSIDE_QUERY
                                throw new IllegalStateException();
                            }

                            @Override
                            public void visit(int docID, byte[] packedValue) throws IOException {
                                int newDocID = docMap.get(docID);
                                if (newDocID != -1) {
                                    // Not deleted:
                                    mergedVisitor.visit(newDocID, packedValue);
                                }
                            }

                            @Override
                            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                                // Forces this segment's PointsReader to always visit all docs + values:
                                return Relation.CELL_CROSSES_QUERY;
                            }
                        });
                    }
                }

                @Override
                public long estimatePointCount(IntersectVisitor visitor) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public byte[] getMinPackedValue() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public byte[] getMaxPackedValue() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public int getNumDimensions() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public int getBytesPerDimension() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public long size() {
                    return finalMaxPointCount;
                }

                @Override
                public int getDocCount() {
                    return finalDocCount;
                }
            };
        }

        @Override
        public void checkIntegrity() throws IOException {
            throw new UnsupportedOperationException();
        }
    });
}
Also used : IOException(java.io.IOException) PointValues(org.apache.lucene.index.PointValues) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 9 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class SimpleTextPointsWriter method writeField.

@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
    PointValues values = reader.getValues(fieldInfo.name);
    boolean singleValuePerDoc = values.size() == values.getDocCount();
    // We use our own fork of the BKDWriter to customize how it writes the index and blocks to disk:
    try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), SimpleTextBKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, SimpleTextBKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, values.size(), singleValuePerDoc)) {
        values.intersect(new IntersectVisitor() {

            @Override
            public void visit(int docID) {
                throw new IllegalStateException();
            }

            public void visit(int docID, byte[] packedValue) throws IOException {
                writer.add(packedValue, docID);
            }

            @Override
            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                return Relation.CELL_CROSSES_QUERY;
            }
        });
        // We could have 0 points on merge since all docs with points may be deleted:
        if (writer.getPointCount() > 0) {
            indexFPs.put(fieldInfo.name, writer.finish(dataOut));
        }
    }
}
Also used : PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IOException(java.io.IOException)

Example 10 with PointValues

use of org.apache.lucene.index.PointValues in project lucene-solr by apache.

the class PointInSetQuery method createWeight.

@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // No docs in this segment/field indexed any points
                return null;
            }
            if (values.getNumDimensions() != numDims) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + values.getNumDimensions() + " but this query has numDims=" + numDims);
            }
            if (values.getBytesPerDimension() != bytesPerDim) {
                throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
            }
            DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            if (numDims == 1) {
                // We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
                values.intersect(new MergePointVisitor(sortedPackedPoints, result));
            } else {
                // NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect.  We could instead do a similar
                // optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
                // index, which is probably tricky!
                SinglePointVisitor visitor = new SinglePointVisitor(result);
                TermIterator iterator = sortedPackedPoints.iterator();
                for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
                    visitor.setPoint(point);
                    values.intersect(visitor);
                }
            }
            return new ConstantScoreScorer(this, score(), result.build().iterator());
        }
    };
}
Also used : PointValues(org.apache.lucene.index.PointValues) LeafReader(org.apache.lucene.index.LeafReader) TermIterator(org.apache.lucene.index.PrefixCodedTerms.TermIterator) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

PointValues (org.apache.lucene.index.PointValues)19 LeafReader (org.apache.lucene.index.LeafReader)9 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)9 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 Relation (org.apache.lucene.index.PointValues.Relation)7 IOException (java.io.IOException)6 FieldInfo (org.apache.lucene.index.FieldInfo)6 DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)6 BinaryPoint (org.apache.lucene.document.BinaryPoint)5 Document (org.apache.lucene.document.Document)5 IntPoint (org.apache.lucene.document.IntPoint)5 Directory (org.apache.lucene.store.Directory)5 DoublePoint (org.apache.lucene.document.DoublePoint)4 FloatPoint (org.apache.lucene.document.FloatPoint)4 LongPoint (org.apache.lucene.document.LongPoint)4 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)4 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)4 FSDirectory (org.apache.lucene.store.FSDirectory)3 RAMDirectory (org.apache.lucene.store.RAMDirectory)3 ArrayList (java.util.ArrayList)2