Search in sources :

Example 1 with SpatialFieldValueBinningStrategy

use of org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy in project geowave by locationtech.

the class GeoWaveSpatialBinningStatisticsIT method testNumericStat.

private static void testNumericStat(final SimpleFeatureType featureType, final DataStore store) throws MalformedURLException, IOException {
    final Geometry[] geometryFilters = new Geometry[] { (Geometry) TestUtils.resourceToFeature(new File(TEST_POLYGON_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_BOX_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_POLYGON_TEMPORAL_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_BOX_TEMPORAL_FILTER_FILE).toURI().toURL()).getDefaultGeometry() };
    // Note: this test is only applicable for the hail (points) and tornado (lines) types
    final String geometryField = featureType.getGeometryDescriptor().getLocalName();
    // we're using a tree map just to make iteration ordered, predictable, and sensible
    final Map<BinningStrategyKey, NumericStatsStatistic> stats = new TreeMap<>(Comparator.comparing(BinningStrategyKey::getName));
    // because each gridding system will be overly inclusive, we need to determine the appropriate
    // over-inclusive reference geometry per gridding system to reliably verify results
    final Map<BinningStrategyKey, Geometry[]> referenceGeometries = new HashMap<>();
    for (final SpatialBinningType type : SpatialBinningType.values()) {
        for (int precision = 1; precision < 4; precision++) {
            // S2 is more than twice as granular in its use of power of 2 "levels" as opposed to only
            // using the granularity of a character for geohash and H3
            // so double the precision for S2 to make it similar in scale
            final int finalPrecision = SpatialBinningType.S2.equals(type) ? precision * 2 : precision;
            final NumericStatsStatistic stat = new NumericStatsStatistic(featureType.getTypeName(), "LOSS");
            final SpatialFieldValueBinningStrategy strategy = new SpatialFieldValueBinningStrategy(geometryField);
            strategy.setPrecision(finalPrecision);
            strategy.setType(type);
            stat.setTag(String.format("Loss-Stats-%s-%d", type, finalPrecision));
            stat.setBinningStrategy(strategy);
            final BinningStrategyKey key = new BinningStrategyKey(strategy);
            stats.put(key, stat);
            final Geometry[] refGeoms = new Geometry[TEST_ENVELOPES.length + geometryFilters.length];
            for (int i = 0; i < TEST_ENVELOPES.length; i++) {
                refGeoms[i] = GeometryUtils.GEOMETRY_FACTORY.toGeometry(TEST_ENVELOPES[i]);
                final ByteArray[] bins = type.getSpatialBins(refGeoms[i], finalPrecision);
                for (final ByteArray bin : bins) {
                    refGeoms[i] = refGeoms[i].union(type.getBinGeometry(bin, finalPrecision));
                }
            }
            for (int i = 0; i < geometryFilters.length; i++) {
                final int refGeomIdx = i + TEST_ENVELOPES.length;
                refGeoms[refGeomIdx] = geometryFilters[i];
                final ByteArray[] bins = type.getSpatialBins(refGeoms[refGeomIdx], finalPrecision);
                for (final ByteArray bin : bins) {
                    refGeoms[refGeomIdx] = refGeoms[refGeomIdx].union(type.getBinGeometry(bin, finalPrecision));
                }
            }
            referenceGeometries.put(key, refGeoms);
        }
    }
    store.addStatistic(stats.values().toArray(new Statistic[stats.size()]));
    // just iterate through all the data to sum up loss as a whole and per area
    final Map<BinningStrategyKey, StatsAccumulator[]> statAccsPerStrategy = new HashMap<>();
    final StatsAccumulator referenceFullScanStatsAccumulator = new StatsAccumulator();
    for (final BinningStrategyKey key : stats.keySet()) {
        final StatsAccumulator[] referenceStatsAccumulators = new StatsAccumulator[TEST_ENVELOPES.length + geometryFilters.length];
        for (int i = 0; i < referenceStatsAccumulators.length; i++) {
            referenceStatsAccumulators[i] = new StatsAccumulator();
        }
        statAccsPerStrategy.put(key, referenceStatsAccumulators);
    }
    try (CloseableIterator<SimpleFeature> it = store.query(VectorQueryBuilder.newBuilder().addTypeName(featureType.getTypeName()).build())) {
        while (it.hasNext()) {
            final SimpleFeature f = it.next();
            // considering centroids are being used for the hashing in this case, just use centroids for
            // this reference
            final Point centroid = ((Geometry) f.getDefaultGeometry()).getCentroid();
            // turns out some of the centroids are "exactly" on the border of hashes, this disambiguates
            // the border (essentially rounding it up)
            final Point centroidOffset = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(centroid.getX() + STATS_COMPARE_EPSILON, centroid.getY() + STATS_COMPARE_EPSILON));
            final double loss = ((Number) f.getAttribute("LOSS")).doubleValue();
            referenceFullScanStatsAccumulator.add(loss);
            for (final BinningStrategyKey key : stats.keySet()) {
                final StatsAccumulator[] referenceStatsAccumulators = statAccsPerStrategy.get(key);
                final Geometry[] refGeoms = referenceGeometries.get(key);
                for (int i = 0; i < refGeoms.length; i++) {
                    if (refGeoms[i].contains(centroidOffset)) {
                        referenceStatsAccumulators[i].add(loss);
                    }
                }
            }
        }
    }
    final Stats referenceFullScanStats = referenceFullScanStatsAccumulator.snapshot();
    final Map<BinningStrategyKey, Stats[]> referenceStatsPerStrategy = new HashMap<>();
    statAccsPerStrategy.forEach((k, v) -> {
        referenceStatsPerStrategy.put(k, Arrays.stream(v).map(a -> a.snapshot()).toArray(Stats[]::new));
    });
    for (final Entry<BinningStrategyKey, NumericStatsStatistic> entry : stats.entrySet()) {
        final NumericStatsStatistic stat = entry.getValue();
        final Stats[] referenceStats = ArrayUtils.add(referenceStatsPerStrategy.get(entry.getKey()), referenceFullScanStats);
        final Stats[] perBinStats = new Stats[referenceStats.length];
        final Stats[] statValue = new Stats[referenceStats.length];
        fillStats(perBinStats, statValue, perBinStats.length - 1, stat, store, BinConstraints.allBins());
        for (int i = 0; i < TEST_ENVELOPES.length; i++) {
            fillStats(perBinStats, statValue, i, stat, store, BinConstraints.ofObject(TEST_ENVELOPES[i]));
        }
        for (int i = 0; i < geometryFilters.length; i++) {
            fillStats(perBinStats, statValue, i + TEST_ENVELOPES.length, stat, store, BinConstraints.ofObject(geometryFilters[i]));
        }
        final double geometricErrorThreshold = TYPE_TO_ERROR_THRESHOLD.get(entry.getKey().type);
        for (int i = 0; i < perBinStats.length; i++) {
            // now just assert that the reference value equals the accumulated value which equals the
            // aggregated "getStatisticValue"
            // for the full scan we can make an exact assertion (to the level of precision of floating
            // point error)
            // for the geometrically constrained assertions we'll need to assert based on the provided
            // error thresholds of the binning strategy (eg. H3 has very poor approximations for
            // line/poly to h3 coords which come into play for the geometrically constrained assertions)
            final boolean isGeometricallyConstrained = (i != (perBinStats.length - 1));
            if (isGeometricallyConstrained) {
                Assert.assertEquals(String.format("Per Bin Stats [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].count(), perBinStats[i].count()), geometricErrorThreshold);
                Assert.assertEquals(String.format("getStatisticValue [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].count(), statValue[i].count()), geometricErrorThreshold);
                Assert.assertEquals(String.format("Per Bin Stats [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].mean(), perBinStats[i].mean()), geometricErrorThreshold);
                Assert.assertEquals(String.format("Per Bin Stats [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].populationVariance(), perBinStats[i].populationVariance()), geometricErrorThreshold);
                Assert.assertEquals(String.format("getStatisticValue [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].mean(), statValue[i].mean()), geometricErrorThreshold);
                Assert.assertEquals(String.format("getStatisticValue [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].populationVariance(), statValue[i].populationVariance()), geometricErrorThreshold);
            } else {
                Assert.assertEquals(String.format("Per Bin Stats [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].count(), perBinStats[i].count());
                Assert.assertEquals(String.format("getStatisticValue [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].count(), statValue[i].count());
                Assert.assertEquals(String.format("Per Bin Stats [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].mean(), perBinStats[i].mean(), STATS_COMPARE_EPSILON);
                Assert.assertEquals(String.format("Per Bin Stats [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].populationVariance(), perBinStats[i].populationVariance(), STATS_COMPARE_EPSILON);
                Assert.assertEquals(String.format("getStatisticValue [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].mean(), statValue[i].mean(), STATS_COMPARE_EPSILON);
                Assert.assertEquals(String.format("getStatisticValue [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].populationVariance(), statValue[i].populationVariance(), STATS_COMPARE_EPSILON);
            }
        }
    }
}
Also used : SpatialBinningType(org.locationtech.geowave.core.geotime.binning.SpatialBinningType) HashMap(java.util.HashMap) SpatialFieldValueBinningStrategy(org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy) NumericStatsStatistic(org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic) Statistic(org.locationtech.geowave.core.store.api.Statistic) CountStatistic(org.locationtech.geowave.core.store.statistics.adapter.CountStatistic) StatsAccumulator(org.locationtech.geowave.core.store.statistics.field.StatsAccumulator) ByteArray(org.locationtech.geowave.core.index.ByteArray) NumericStatsStatistic(org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic) Point(org.locationtech.jts.geom.Point) TreeMap(java.util.TreeMap) Point(org.locationtech.jts.geom.Point) SimpleFeature(org.opengis.feature.simple.SimpleFeature) Geometry(org.locationtech.jts.geom.Geometry) Coordinate(org.locationtech.jts.geom.Coordinate) Stats(org.locationtech.geowave.core.store.statistics.field.Stats) File(java.io.File)

Example 2 with SpatialFieldValueBinningStrategy

use of org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy in project geowave by locationtech.

the class SpatialBinningStatisticExample method main.

public static void main(final String[] args) {
    final SimpleFeatureType featureType = getSimpleFeatureType();
    // Points (to be ingested into GeoWave Data Store)
    final List<SimpleFeature> cannedFeatures = ImmutableList.of(buildSimpleFeature(featureType, "Loc1", new Coordinate(-77.0352, 38.8895), 12), buildSimpleFeature(featureType, "Loc2", new Coordinate(-77.0366, 38.8977), 13), buildSimpleFeature(featureType, "Loc3", new Coordinate(-76.8644, 38.9078), 8), buildSimpleFeature(featureType, "Loc4", new Coordinate(-76.350677, 38.9641511), 15), buildSimpleFeature(featureType, "Loc5", new Coordinate(-77.3384112, 38.416091), 7), buildSimpleFeature(featureType, "Loc6", new Coordinate(-67.0352, 28.8895), 3), buildSimpleFeature(featureType, "Loc7", new Coordinate(-67.0366, 28.8977), 99), buildSimpleFeature(featureType, "Loc8", new Coordinate(-66.8644, 28.9078), 0), buildSimpleFeature(featureType, "Loc9", new Coordinate(-66.350677, 28.9641511), 1), buildSimpleFeature(featureType, "Loc10", new Coordinate(-67.3384112, 28.416091), 23));
    final Index index = SpatialDimensionalityTypeProvider.createIndexFromOptions(new SpatialOptions());
    final DataStore dataStore = DataStoreFactory.createDataStore(new MemoryRequiredOptions());
    final FeatureDataAdapter adapter = new FeatureDataAdapter(featureType);
    final Envelope bbox1 = new Envelope(-77.5, -76, 38.4, 39);
    final Envelope bbox2 = new Envelope(-67.5, -66, 28.4, 29);
    dataStore.addType(adapter, index);
    final CountStatistic s2Count = new CountStatistic(featureType.getTypeName());
    s2Count.setTag("S2-Example");
    final SpatialFieldValueBinningStrategy s2SpatialBinning = new SpatialFieldValueBinningStrategy(featureType.getGeometryDescriptor().getLocalName());
    // type could be Google's S2, Uber's H3, or simple GeoHash
    s2SpatialBinning.setType(SpatialBinningType.S2);
    // precision is the character length for H3 and GeoHash which is over twice as coarse as S2
    // which uses powers of two for precision (so a precision of 8 in S2 is actually a coarser
    // granularity than a precision of 4 in GeoHash or H3)
    s2SpatialBinning.setPrecision(7);
    s2Count.setBinningStrategy(s2SpatialBinning);
    final CountStatistic h3Count = new CountStatistic(featureType.getTypeName());
    // stats for the same feature type should have different tags
    h3Count.setTag("H3-Example");
    final SpatialFieldValueBinningStrategy h3SpatialBinning = new SpatialFieldValueBinningStrategy(featureType.getGeometryDescriptor().getLocalName());
    // type could be Google's S2, Uber's H3, or simple GeoHash
    h3SpatialBinning.setType(SpatialBinningType.H3);
    h3SpatialBinning.setPrecision(3);
    h3Count.setBinningStrategy(h3SpatialBinning);
    final CountStatistic geohashCount = new CountStatistic(featureType.getTypeName());
    geohashCount.setTag("Geohash-Example");
    final SpatialFieldValueBinningStrategy geohashSpatialBinning = new SpatialFieldValueBinningStrategy(featureType.getGeometryDescriptor().getLocalName());
    // type could be Google's S2, Uber's H3, or simple GeoHash
    geohashSpatialBinning.setType(SpatialBinningType.GEOHASH);
    geohashSpatialBinning.setPrecision(3);
    geohashCount.setBinningStrategy(geohashSpatialBinning);
    // you can add "empty" statistic before you've written any data, the stats will then be updated
    // as you write data
    // alternatively if you don't use the "empty" variant it will automatically calculate and update
    // these stats for pre-existing data before returning from the method
    dataStore.addEmptyStatistic(s2Count, h3Count, geohashCount);
    // Ingest cannedFeatures into the DataStore.
    try (Writer<SimpleFeature> indexWriter = dataStore.createWriter(adapter.getTypeName())) {
        for (final SimpleFeature sf : cannedFeatures) {
            indexWriter.write(sf);
        }
    }
    System.out.println("***** S2 Binning *****");
    System.out.println("** All Bins **");
    try (CloseableIterator<Pair<ByteArray, Long>> it = dataStore.getBinnedStatisticValues(s2Count)) {
        // you can get all bins
        while (it.hasNext()) {
            final Pair<ByteArray, Long> pair = it.next();
            System.out.println(String.format("Count: %d, Bin: %s, Bin Geometry: %s", pair.getRight(), s2SpatialBinning.binToString(pair.getLeft()), s2SpatialBinning.getType().getBinGeometry(pair.getLeft(), 7)));
        }
    }
    System.out.println(String.format("** Bins Within Envelope %s **", bbox1));
    try (CloseableIterator<Pair<ByteArray, Long>> it = dataStore.getBinnedStatisticValues(s2Count, BinConstraints.ofObject(bbox1))) {
        // or you can get only bins within an envelope
        while (it.hasNext()) {
            final Pair<ByteArray, Long> pair = it.next();
            System.out.println(String.format("Count: %d, Bin: %s, Bin Geometry: %s", pair.getRight(), s2SpatialBinning.binToString(pair.getLeft()), s2SpatialBinning.getType().getBinGeometry(pair.getLeft(), 7)));
        }
    }
    // or you could just get the aggregated statistic value for an envelope (keep in mind this is
    // using the statistic bins that intersect the envelope so may be an over-estimate for bins that
    // only partially intersect)
    System.out.println(String.format("** %d in bbox %s **", dataStore.getStatisticValue(s2Count, BinConstraints.ofObject(bbox2)), bbox2));
    System.out.println("\n***** H3 Binning *****");
    System.out.println("** All Bins **");
    try (CloseableIterator<Pair<ByteArray, Long>> it = dataStore.getBinnedStatisticValues(h3Count)) {
        // you can get all bins
        while (it.hasNext()) {
            final Pair<ByteArray, Long> pair = it.next();
            System.out.println(String.format("Count: %d, Bin: %s, Bin Geometry: %s", pair.getRight(), h3SpatialBinning.binToString(pair.getLeft()), h3SpatialBinning.getType().getBinGeometry(pair.getLeft(), 3)));
        }
    }
    System.out.println(String.format("** Bins Within Envelope %s **", bbox1));
    try (CloseableIterator<Pair<ByteArray, Long>> it = dataStore.getBinnedStatisticValues(h3Count, BinConstraints.ofObject(bbox1))) {
        // or you can get only bins within an envelope
        while (it.hasNext()) {
            final Pair<ByteArray, Long> pair = it.next();
            System.out.println(String.format("Count: %d, Bin: %s, Bin Geometry: %s", pair.getRight(), h3SpatialBinning.binToString(pair.getLeft()), h3SpatialBinning.getType().getBinGeometry(pair.getLeft(), 3)));
        }
    }
    // or you could just get the aggregated statistic value for an envelope (keep in mind this is
    // using the statistic bins that intersect the envelope so may be an over-estimate for bins that
    // only partially intersect)
    System.out.println(String.format("** %d in bbox %s **", dataStore.getStatisticValue(h3Count, BinConstraints.ofObject(bbox2)), bbox2));
    System.out.println("\n***** Geohash Binning *****");
    System.out.println("** All Bins **");
    try (CloseableIterator<Pair<ByteArray, Long>> it = dataStore.getBinnedStatisticValues(geohashCount)) {
        // you can get all bins
        while (it.hasNext()) {
            final Pair<ByteArray, Long> pair = it.next();
            System.out.println(String.format("Count: %d, Bin: %s, Bin Geometry: %s", pair.getRight(), geohashSpatialBinning.binToString(pair.getLeft()), geohashSpatialBinning.getType().getBinGeometry(pair.getLeft(), 3)));
        }
    }
    System.out.println(String.format("** Bins Within Envelope %s **", bbox1));
    try (CloseableIterator<Pair<ByteArray, Long>> it = dataStore.getBinnedStatisticValues(geohashCount, BinConstraints.ofObject(bbox1))) {
        // or you can get only bins within an envelope
        while (it.hasNext()) {
            final Pair<ByteArray, Long> pair = it.next();
            System.out.println(String.format("Count: %d, Bin: %s, Bin Geometry: %s", pair.getRight(), geohashSpatialBinning.binToString(pair.getLeft()), geohashSpatialBinning.getType().getBinGeometry(pair.getLeft(), 3)));
        }
    }
    // or you could just get the aggregated statistic value for an envelope (keep in mind this is
    // using the statistic bins that intersect the envelope so may be an over-estimate for bins that
    // only partially intersect)
    System.out.println(String.format("** %d in bbox %s **", dataStore.getStatisticValue(geohashCount, BinConstraints.ofObject(bbox2)), bbox2));
    // and finally just to make it clear, you can apply spatial binning to *any* statistic not just
    // counts
    // so here's an example binning numeric stats of the population (sum, avg, std dev, etc.) by an
    // S2 level 7 grid
    final NumericStatsStatistic s2PopulationStats = new NumericStatsStatistic(featureType.getTypeName(), "population");
    s2PopulationStats.setTag("S2-Population-Stats");
    final SpatialFieldValueBinningStrategy s2PopulationSpatialBinning = new SpatialFieldValueBinningStrategy(featureType.getGeometryDescriptor().getLocalName());
    s2PopulationSpatialBinning.setType(SpatialBinningType.S2);
    s2PopulationSpatialBinning.setPrecision(7);
    s2PopulationStats.setBinningStrategy(s2PopulationSpatialBinning);
    // here we'll calculate the stat on add based on the already written data (rather than adding
    // the "empty" statistic)
    dataStore.addStatistic(s2PopulationStats);
    // and we'll run through the same set of examples of getting all the bins and then filtering by
    // an envelope
    System.out.println("\n***** S2 Population Stats Binning *****");
    System.out.println("** All Bins **");
    try (CloseableIterator<Pair<ByteArray, Stats>> it = dataStore.getBinnedStatisticValues(s2PopulationStats)) {
        // you can get all bins
        while (it.hasNext()) {
            final Pair<ByteArray, Stats> pair = it.next();
            System.out.println(String.format("Population: %s, Bin: %s, Bin Geometry: %s", pair.getRight(), s2PopulationSpatialBinning.binToString(pair.getLeft()), s2PopulationSpatialBinning.getType().getBinGeometry(pair.getLeft(), 3)));
        }
    }
    System.out.println(String.format("** Bins Within Envelope %s **", bbox1));
    try (CloseableIterator<Pair<ByteArray, Stats>> it = dataStore.getBinnedStatisticValues(s2PopulationStats, BinConstraints.ofObject(bbox1))) {
        // or you can get only bins within an envelope
        while (it.hasNext()) {
            final Pair<ByteArray, Stats> pair = it.next();
            System.out.println(String.format("Population: %s, Bin: %s, Bin Geometry: %s", pair.getRight(), s2PopulationSpatialBinning.binToString(pair.getLeft()), s2PopulationSpatialBinning.getType().getBinGeometry(pair.getLeft(), 3)));
        }
    }
    // or you could just get the aggregated statistic value for an envelope (keep in mind this is
    // using the statistic bins that intersect the envelope so may be an over-estimate for bins that
    // only partially intersect)
    System.out.println(String.format("** Population Stats '%s' in bbox %s **", dataStore.getStatisticValue(s2PopulationStats, BinConstraints.ofObject(bbox2)), bbox2));
}
Also used : SpatialFieldValueBinningStrategy(org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy) NumericStatsStatistic(org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic) Index(org.locationtech.geowave.core.store.api.Index) Envelope(org.locationtech.jts.geom.Envelope) SpatialOptions(org.locationtech.geowave.core.geotime.index.SpatialOptions) SimpleFeature(org.opengis.feature.simple.SimpleFeature) SimpleFeatureType(org.opengis.feature.simple.SimpleFeatureType) Coordinate(org.locationtech.jts.geom.Coordinate) DataStore(org.locationtech.geowave.core.store.api.DataStore) Stats(org.locationtech.geowave.core.store.statistics.field.Stats) ByteArray(org.locationtech.geowave.core.index.ByteArray) MemoryRequiredOptions(org.locationtech.geowave.core.store.memory.MemoryRequiredOptions) FeatureDataAdapter(org.locationtech.geowave.adapter.vector.FeatureDataAdapter) CountStatistic(org.locationtech.geowave.core.store.statistics.adapter.CountStatistic) Pair(org.apache.commons.lang3.tuple.Pair)

Example 3 with SpatialFieldValueBinningStrategy

use of org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy in project geowave by locationtech.

the class GeoWaveSpatialBinningStatisticsIT method testGeometry.

private static void testGeometry(final SimpleFeatureType featureType, final DataStore store) {
    final String geometryField = featureType.getGeometryDescriptor().getLocalName();
    final List<CountStatistic> stats = new ArrayList<>();
    for (final SpatialBinningType type : SpatialBinningType.values()) {
        for (final ComplexGeometryBinningOption complexGeometryOption : ComplexGeometryBinningOption.values()) {
            for (int precision = 1; precision < 4; precision++) {
                // S2 is more than twice as granular in its use of power of 2 "levels" as opposed to only
                // using the granularity of a character for geohash and H3
                // so double the precision for S2 to make it similar in scale
                final int finalPrecision = SpatialBinningType.S2.equals(type) ? precision * 2 : precision;
                final CountStatistic count = new CountStatistic(featureType.getTypeName());
                final SpatialFieldValueBinningStrategy strategy = new SpatialFieldValueBinningStrategy(geometryField);
                strategy.setComplexGeometry(complexGeometryOption);
                strategy.setPrecision(finalPrecision);
                strategy.setType(type);
                count.setTag(String.format("%s-%d-%s", type, finalPrecision, complexGeometryOption));
                count.setBinningStrategy(strategy);
                stats.add(count);
            }
        }
    }
    store.addStatistic(stats.toArray(new Statistic[stats.size()]));
    final CountStatistic referenceCountStat = new CountStatistic(featureType.getTypeName());
    store.addStatistic(referenceCountStat);
    final Long expectedCount = store.getStatisticValue(referenceCountStat);
    Assert.assertTrue("Must be at least one entry", expectedCount > 0);
    // sanity check scaling
    stats.stream().filter(s -> ((SpatialFieldValueBinningStrategy) s.getBinningStrategy()).getComplexGeometry().equals(ComplexGeometryBinningOption.USE_FULL_GEOMETRY_SCALE_BY_OVERLAP)).forEach(s -> Assert.assertEquals(String.format("%s failed scaled geometry", ((SpatialFieldValueBinningStrategy) s.getBinningStrategy()).getDefaultTag()), expectedCount, store.getStatisticValue(s), expectedCount * TYPE_TO_ERROR_THRESHOLD.get(((SpatialFieldValueBinningStrategy) s.getBinningStrategy()).getType())));
    // sanity check centroids
    stats.stream().filter(s -> ((SpatialFieldValueBinningStrategy) s.getBinningStrategy()).getComplexGeometry().equals(ComplexGeometryBinningOption.USE_CENTROID_ONLY)).forEach(s -> Assert.assertEquals(String.format("%s failed centroids at precision %d", ((SpatialFieldValueBinningStrategy) s.getBinningStrategy()).getType(), ((SpatialFieldValueBinningStrategy) s.getBinningStrategy()).getPrecision()), expectedCount, store.getStatisticValue(s)));
    // best way to sanity check full geometry is to perhaps check every bin count for centroid only
    // and for full geometry scale by overlap and make sure bin-by-bin every one of the full
    // geometry bins contains at least the count for either of the other 2 approaches (although
    // technically a centroid may be a bin that the full geometry doesn't even intersect so this is
    // not always a fair expectation but it'll suffice, particular when are precision only goes to 4
    // in this test
    final Map<BinningStrategyKey, Map<ByteArray, Long>> perBinResults = new HashMap<>();
    stats.stream().forEach(s -> {
        final Map<ByteArray, Long> results = new HashMap<>();
        ;
        perBinResults.put(new BinningStrategyKey((SpatialFieldValueBinningStrategy) s.getBinningStrategy()), results);
        try (CloseableIterator<Pair<ByteArray, Long>> it = store.getBinnedStatisticValues(s)) {
            while (it.hasNext()) {
                final Pair<ByteArray, Long> bin = it.next();
                Assert.assertFalse(results.containsKey(bin.getKey()));
                results.put(bin.getKey(), bin.getValue());
            }
        }
    });
    perBinResults.entrySet().stream().filter(e -> ComplexGeometryBinningOption.USE_FULL_GEOMETRY.equals(e.getKey().option)).forEach(entry -> {
        // get both the other complex binning options with matching type and precision and
        // make sure this full geometry count is at least the others for each bin
        final Map<ByteArray, Long> centroidResults = perBinResults.get(new BinningStrategyKey(entry.getKey().type, entry.getKey().precision, ComplexGeometryBinningOption.USE_CENTROID_ONLY));
        final Map<ByteArray, Long> scaledResults = perBinResults.get(new BinningStrategyKey(entry.getKey().type, entry.getKey().precision, ComplexGeometryBinningOption.USE_FULL_GEOMETRY_SCALE_BY_OVERLAP));
        entry.getValue().forEach((bin, count) -> {
            // make sure the scaled results exists for this bin, but is less than or equal to
            // this count
            final Long scaledResult = scaledResults.get(bin);
            Assert.assertNotNull(String.format("Scaled result doesn't exist for %s (%d) at bin %s", entry.getKey().type, entry.getKey().precision, entry.getKey().type.binToString(bin.getBytes())), scaledResult);
            Assert.assertTrue(String.format("Scaled result is greater than the full geometry for %s (%d) at bin %s", entry.getKey().type, entry.getKey().precision, entry.getKey().type.binToString(bin.getBytes())), scaledResult <= count);
            final Long centroidResult = centroidResults.get(bin);
            Assert.assertTrue(String.format("Centroid result is greater than the full geometry for %s (%d) at bin %s", entry.getKey().type, entry.getKey().precision, entry.getKey().type.binToString(bin.getBytes())), (centroidResult == null) || (centroidResult <= count));
        });
    });
}
Also used : FeatureDataAdapter(org.locationtech.geowave.adapter.vector.FeatureDataAdapter) Arrays(java.util.Arrays) AbstractFieldRetypingSource(org.locationtech.geowave.format.geotools.vector.AbstractFieldRetypingSource) LoggerFactory(org.slf4j.LoggerFactory) Coordinate(org.locationtech.jts.geom.Coordinate) NumericStatsStatistic(org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic) TestUtils(org.locationtech.geowave.test.TestUtils) FloatCompareUtils(org.locationtech.geowave.core.index.FloatCompareUtils) SimpleFeatureTypeBuilder(org.geotools.feature.simple.SimpleFeatureTypeBuilder) Pair(org.apache.commons.lang3.tuple.Pair) SimpleFeature(org.opengis.feature.simple.SimpleFeature) GeoToolsVectorDataOptions(org.locationtech.geowave.format.geotools.vector.GeoToolsVectorDataOptions) AttributeDescriptor(org.opengis.feature.type.AttributeDescriptor) Map(java.util.Map) Statistic(org.locationtech.geowave.core.store.api.Statistic) CountStatistic(org.locationtech.geowave.core.store.statistics.adapter.CountStatistic) AfterClass(org.junit.AfterClass) SpatialBinningType(org.locationtech.geowave.core.geotime.binning.SpatialBinningType) ImmutableMap(com.google.common.collect.ImmutableMap) Point(org.locationtech.jts.geom.Point) GeometryUtils(org.locationtech.geowave.core.geotime.util.GeometryUtils) DimensionalityType(org.locationtech.geowave.test.TestUtils.DimensionalityType) List(java.util.List) Entry(java.util.Map.Entry) GeoWaveITRunner(org.locationtech.geowave.test.GeoWaveITRunner) Geometry(org.locationtech.jts.geom.Geometry) ByteArray(org.locationtech.geowave.core.index.ByteArray) StatsAccumulator(org.locationtech.geowave.core.store.statistics.field.StatsAccumulator) BeforeClass(org.junit.BeforeClass) VectorQueryBuilder(org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder) BinConstraints(org.locationtech.geowave.core.store.api.BinConstraints) GeoWaveTestStore(org.locationtech.geowave.test.annotation.GeoWaveTestStore) Name(org.opengis.feature.type.Name) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayUtils(org.apache.commons.lang3.ArrayUtils) RetypingVectorDataPlugin(org.locationtech.geowave.format.geotools.vector.RetypingVectorDataPlugin) ComplexGeometryBinningOption(org.locationtech.geowave.core.geotime.binning.ComplexGeometryBinningOption) SimpleFeatureType(org.opengis.feature.simple.SimpleFeatureType) ArrayList(java.util.ArrayList) LocalFileIngestPlugin(org.locationtech.geowave.core.store.ingest.LocalFileIngestPlugin) Stats(org.locationtech.geowave.core.store.statistics.field.Stats) Logger(org.slf4j.Logger) MalformedURLException(java.net.MalformedURLException) Files(java.nio.file.Files) DataStore(org.locationtech.geowave.core.store.api.DataStore) IngestOptions(org.locationtech.geowave.core.store.api.IngestOptions) IOException(java.io.IOException) Test(org.junit.Test) File(java.io.File) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) TreeMap(java.util.TreeMap) CloseableIterator(org.locationtech.geowave.core.store.CloseableIterator) Paths(java.nio.file.Paths) SpatialFieldValueBinningStrategy(org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy) Comparator(java.util.Comparator) Assert(org.junit.Assert) GeoToolsVectorDataStoreIngestPlugin(org.locationtech.geowave.format.geotools.vector.GeoToolsVectorDataStoreIngestPlugin) Envelope(org.locationtech.jts.geom.Envelope) SpatialBinningType(org.locationtech.geowave.core.geotime.binning.SpatialBinningType) HashMap(java.util.HashMap) SpatialFieldValueBinningStrategy(org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy) ArrayList(java.util.ArrayList) Point(org.locationtech.jts.geom.Point) NumericStatsStatistic(org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic) Statistic(org.locationtech.geowave.core.store.api.Statistic) CountStatistic(org.locationtech.geowave.core.store.statistics.adapter.CountStatistic) ByteArray(org.locationtech.geowave.core.index.ByteArray) ComplexGeometryBinningOption(org.locationtech.geowave.core.geotime.binning.ComplexGeometryBinningOption) CountStatistic(org.locationtech.geowave.core.store.statistics.adapter.CountStatistic) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Pair(org.apache.commons.lang3.tuple.Pair)

Aggregations

SpatialFieldValueBinningStrategy (org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy)3 ByteArray (org.locationtech.geowave.core.index.ByteArray)3 CountStatistic (org.locationtech.geowave.core.store.statistics.adapter.CountStatistic)3 NumericStatsStatistic (org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic)3 Stats (org.locationtech.geowave.core.store.statistics.field.Stats)3 File (java.io.File)2 HashMap (java.util.HashMap)2 TreeMap (java.util.TreeMap)2 Pair (org.apache.commons.lang3.tuple.Pair)2 FeatureDataAdapter (org.locationtech.geowave.adapter.vector.FeatureDataAdapter)2 SpatialBinningType (org.locationtech.geowave.core.geotime.binning.SpatialBinningType)2 DataStore (org.locationtech.geowave.core.store.api.DataStore)2 Statistic (org.locationtech.geowave.core.store.api.Statistic)2 Coordinate (org.locationtech.jts.geom.Coordinate)2 SimpleFeature (org.opengis.feature.simple.SimpleFeature)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 MalformedURLException (java.net.MalformedURLException)1 Files (java.nio.file.Files)1 Paths (java.nio.file.Paths)1