Search in sources :

Example 1 with PointsReader

use of org.apache.lucene.codecs.PointsReader in project lucene-solr by apache.

the class BaseGeoPointTestCase method doRandomDistanceTest.

private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    int pointsInLeaf = 2 + random().nextInt(4);
    iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {

        @Override
        public PointsFormat pointsFormat() {
            return new PointsFormat() {

                @Override
                public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
                    return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
                }

                @Override
                public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
                    return new Lucene60PointsReader(readState);
                }
            };
        }
    });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    for (int i = 0; i < numDocs; i++) {
        double latRaw = nextLatitude();
        double lonRaw = nextLongitude();
        // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
        double lat = quantizeLat(latRaw);
        double lon = quantizeLon(lonRaw);
        Document doc = new Document();
        addPointToDoc("field", doc, lat, lon);
        doc.add(new StoredField("lat", lat));
        doc.add(new StoredField("lon", lon));
        writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i < numQueries; i++) {
        double lat = nextLatitude();
        double lon = nextLongitude();
        double radius = 50000000D * random().nextDouble();
        BitSet expected = new BitSet();
        for (int doc = 0; doc < reader.maxDoc(); doc++) {
            double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
            double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
            double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
            if (distance <= radius) {
                expected.set(doc);
            }
        }
        TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
        BitSet actual = new BitSet();
        for (ScoreDoc doc : topDocs.scoreDocs) {
            actual.set(doc.doc);
        }
        try {
            assertEquals(expected, actual);
        } catch (AssertionError e) {
            System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
            for (int doc = 0; doc < reader.maxDoc(); doc++) {
                double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
                double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
                double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
                System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
            }
            throw e;
        }
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) PointsWriter(org.apache.lucene.codecs.PointsWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) SegmentReadState(org.apache.lucene.index.SegmentReadState) FixedBitSet(org.apache.lucene.util.FixedBitSet) BitSet(java.util.BitSet) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) FilterCodec(org.apache.lucene.codecs.FilterCodec) ScoreDoc(org.apache.lucene.search.ScoreDoc) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) PointsFormat(org.apache.lucene.codecs.PointsFormat) PointsReader(org.apache.lucene.codecs.PointsReader) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) IndexReader(org.apache.lucene.index.IndexReader) SegmentWriteState(org.apache.lucene.index.SegmentWriteState) Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with PointsReader

use of org.apache.lucene.codecs.PointsReader in project lucene-solr by apache.

the class PointValuesWriter method flush.

public void flush(SegmentWriteState state, Sorter.DocMap sortMap, PointsWriter writer) throws IOException {
    PointValues points = new MutablePointValues() {

        final int[] ords = new int[numPoints];

        {
            for (int i = 0; i < numPoints; ++i) {
                ords[i] = i;
            }
        }

        @Override
        public void intersect(IntersectVisitor visitor) throws IOException {
            final BytesRef scratch = new BytesRef();
            final byte[] packedValue = new byte[packedBytesLength];
            for (int i = 0; i < numPoints; i++) {
                getValue(i, scratch);
                assert scratch.length == packedValue.length;
                System.arraycopy(scratch.bytes, scratch.offset, packedValue, 0, packedBytesLength);
                visitor.visit(getDocID(i), packedValue);
            }
        }

        @Override
        public long estimatePointCount(IntersectVisitor visitor) {
            throw new UnsupportedOperationException();
        }

        @Override
        public byte[] getMinPackedValue() {
            throw new UnsupportedOperationException();
        }

        @Override
        public byte[] getMaxPackedValue() {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getNumDimensions() {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getBytesPerDimension() {
            throw new UnsupportedOperationException();
        }

        @Override
        public long size() {
            return numPoints;
        }

        @Override
        public int getDocCount() {
            return numDocs;
        }

        @Override
        public void swap(int i, int j) {
            int tmp = ords[i];
            ords[i] = ords[j];
            ords[j] = tmp;
        }

        @Override
        public int getDocID(int i) {
            return docIDs[ords[i]];
        }

        @Override
        public void getValue(int i, BytesRef packedValue) {
            final long offset = (long) packedBytesLength * ords[i];
            packedValue.length = packedBytesLength;
            bytes.setRawBytesRef(packedValue, offset);
        }

        @Override
        public byte getByteAt(int i, int k) {
            final long offset = (long) packedBytesLength * ords[i] + k;
            return bytes.readByte(offset);
        }
    };
    final PointValues values;
    if (sortMap == null) {
        values = points;
    } else {
        values = new MutableSortingPointValues((MutablePointValues) points, sortMap);
    }
    PointsReader reader = new PointsReader() {

        @Override
        public PointValues getValues(String fieldName) {
            if (fieldName.equals(fieldInfo.name) == false) {
                throw new IllegalArgumentException("fieldName must be the same");
            }
            return values;
        }

        @Override
        public void checkIntegrity() {
            throw new UnsupportedOperationException();
        }

        @Override
        public long ramBytesUsed() {
            return 0L;
        }

        @Override
        public void close() {
        }
    };
    writer.writeField(fieldInfo, reader);
}
Also used : MutablePointValues(org.apache.lucene.codecs.MutablePointValues) PointsReader(org.apache.lucene.codecs.PointsReader) BytesRef(org.apache.lucene.util.BytesRef) MutablePointValues(org.apache.lucene.codecs.MutablePointValues)

Example 3 with PointsReader

use of org.apache.lucene.codecs.PointsReader in project lucene-solr by apache.

the class CheckIndex method testPoints.

/**
   * Test the points index
   * @lucene.experimental
   */
public static Status.PointsStatus testPoints(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
    if (infoStream != null) {
        infoStream.print("    test: points..............");
    }
    long startNS = System.nanoTime();
    FieldInfos fieldInfos = reader.getFieldInfos();
    Status.PointsStatus status = new Status.PointsStatus();
    try {
        if (fieldInfos.hasPointValues()) {
            PointsReader pointsReader = reader.getPointsReader();
            if (pointsReader == null) {
                throw new RuntimeException("there are fields with points, but reader.getPointsReader() is null");
            }
            for (FieldInfo fieldInfo : fieldInfos) {
                if (fieldInfo.getPointDimensionCount() > 0) {
                    PointValues values = pointsReader.getValues(fieldInfo.name);
                    if (values == null) {
                        continue;
                    }
                    status.totalValueFields++;
                    long size = values.size();
                    int docCount = values.getDocCount();
                    final long crossCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_CROSSES_QUERY));
                    if (crossCost < size / 2) {
                        throw new RuntimeException("estimatePointCount should return >= size/2 when all cells match");
                    }
                    final long insideCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_INSIDE_QUERY));
                    if (insideCost < size) {
                        throw new RuntimeException("estimatePointCount should return >= size when all cells fully match");
                    }
                    final long outsideCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_OUTSIDE_QUERY));
                    if (outsideCost != 0) {
                        throw new RuntimeException("estimatePointCount should return 0 when no cells match");
                    }
                    VerifyPointsVisitor visitor = new VerifyPointsVisitor(fieldInfo.name, reader.maxDoc(), values);
                    values.intersect(visitor);
                    if (visitor.getPointCountSeen() != size) {
                        throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + visitor.getPointCountSeen());
                    }
                    if (visitor.getDocCountSeen() != docCount) {
                        throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have docCount=" + docCount + " but in fact has " + visitor.getDocCountSeen());
                    }
                    status.totalValuePoints += visitor.getPointCountSeen();
                }
            }
        }
        msg(infoStream, String.format(Locale.ROOT, "OK [%d fields, %d points] [took %.3f sec]", status.totalValueFields, status.totalValuePoints, nsToSec(System.nanoTime() - startNS)));
    } catch (Throwable e) {
        if (failFast) {
            throw IOUtils.rethrowAlways(e);
        }
        msg(infoStream, "ERROR: " + e);
        status.error = e;
        if (infoStream != null) {
            e.printStackTrace(infoStream);
        }
    }
    return status;
}
Also used : DocValuesStatus(org.apache.lucene.index.CheckIndex.Status.DocValuesStatus) PointsReader(org.apache.lucene.codecs.PointsReader)

Example 4 with PointsReader

use of org.apache.lucene.codecs.PointsReader in project lucene-solr by apache.

the class Lucene60PointsWriter method merge.

@Override
public void merge(MergeState mergeState) throws IOException {
    /**
     * If indexSort is activated and some of the leaves are not sorted the next test will catch that and the non-optimized merge will run.
     * If the readers are all sorted then it's safe to perform a bulk merge of the points.
     **/
    for (PointsReader reader : mergeState.pointsReaders) {
        if (reader instanceof Lucene60PointsReader == false) {
            // We can only bulk merge when all to-be-merged segments use our format:
            super.merge(mergeState);
            return;
        }
    }
    for (PointsReader reader : mergeState.pointsReaders) {
        if (reader != null) {
            reader.checkIntegrity();
        }
    }
    for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
        if (fieldInfo.getPointDimensionCount() != 0) {
            if (fieldInfo.getPointDimensionCount() == 1) {
                boolean singleValuePerDoc = true;
                // Worst case total maximum size (if none of the points are deleted):
                long totMaxSize = 0;
                for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                    PointsReader reader = mergeState.pointsReaders[i];
                    if (reader != null) {
                        FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                        FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                        if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                            PointValues values = reader.getValues(fieldInfo.name);
                            if (values != null) {
                                totMaxSize += values.size();
                                singleValuePerDoc &= values.size() == values.getDocCount();
                            }
                        }
                    }
                }
                // we were simply reindexing them:
                try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, totMaxSize, singleValuePerDoc)) {
                    List<BKDReader> bkdReaders = new ArrayList<>();
                    List<MergeState.DocMap> docMaps = new ArrayList<>();
                    for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                        PointsReader reader = mergeState.pointsReaders[i];
                        if (reader != null) {
                            // we confirmed this up above
                            assert reader instanceof Lucene60PointsReader;
                            Lucene60PointsReader reader60 = (Lucene60PointsReader) reader;
                            // NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to this
                            // reader's FieldInfo as we do below) because field numbers can easily be different
                            // when addIndexes(Directory...) copies over segments from another index:
                            FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                            FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                            if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                                BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
                                if (bkdReader != null) {
                                    bkdReaders.add(bkdReader);
                                    docMaps.add(mergeState.docMaps[i]);
                                }
                            }
                        }
                    }
                    long fp = writer.merge(dataOut, docMaps, bkdReaders);
                    if (fp != -1) {
                        indexFPs.put(fieldInfo.name, fp);
                    }
                }
            } else {
                mergeOneField(mergeState, fieldInfo);
            }
        }
    }
    finish();
}
Also used : ArrayList(java.util.ArrayList) FieldInfos(org.apache.lucene.index.FieldInfos) BKDReader(org.apache.lucene.util.bkd.BKDReader) MutablePointValues(org.apache.lucene.codecs.MutablePointValues) PointValues(org.apache.lucene.index.PointValues) PointsReader(org.apache.lucene.codecs.PointsReader) BKDWriter(org.apache.lucene.util.bkd.BKDWriter) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

PointsReader (org.apache.lucene.codecs.PointsReader)4 MutablePointValues (org.apache.lucene.codecs.MutablePointValues)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 BitSet (java.util.BitSet)1 FilterCodec (org.apache.lucene.codecs.FilterCodec)1 PointsFormat (org.apache.lucene.codecs.PointsFormat)1 PointsWriter (org.apache.lucene.codecs.PointsWriter)1 Lucene60PointsReader (org.apache.lucene.codecs.lucene60.Lucene60PointsReader)1 Lucene60PointsWriter (org.apache.lucene.codecs.lucene60.Lucene60PointsWriter)1 Document (org.apache.lucene.document.Document)1 StoredField (org.apache.lucene.document.StoredField)1 DocValuesStatus (org.apache.lucene.index.CheckIndex.Status.DocValuesStatus)1 FieldInfo (org.apache.lucene.index.FieldInfo)1 FieldInfos (org.apache.lucene.index.FieldInfos)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 PointValues (org.apache.lucene.index.PointValues)1 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)1 SegmentReadState (org.apache.lucene.index.SegmentReadState)1