Search in sources :

Example 1 with StatsAccumulator

use of org.locationtech.geowave.core.store.statistics.field.StatsAccumulator in project geowave by locationtech.

the class GeoWaveSpatialBinningStatisticsIT method testNumericStat.

private static void testNumericStat(final SimpleFeatureType featureType, final DataStore store) throws MalformedURLException, IOException {
    final Geometry[] geometryFilters = new Geometry[] { (Geometry) TestUtils.resourceToFeature(new File(TEST_POLYGON_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_BOX_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_POLYGON_TEMPORAL_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_BOX_TEMPORAL_FILTER_FILE).toURI().toURL()).getDefaultGeometry() };
    // Note: this test is only applicable for the hail (points) and tornado (lines) types
    final String geometryField = featureType.getGeometryDescriptor().getLocalName();
    // we're using a tree map just to make iteration ordered, predictable, and sensible
    final Map<BinningStrategyKey, NumericStatsStatistic> stats = new TreeMap<>(Comparator.comparing(BinningStrategyKey::getName));
    // because each gridding system will be overly inclusive, we need to determine the appropriate
    // over-inclusive reference geometry per gridding system to reliably verify results
    final Map<BinningStrategyKey, Geometry[]> referenceGeometries = new HashMap<>();
    for (final SpatialBinningType type : SpatialBinningType.values()) {
        for (int precision = 1; precision < 4; precision++) {
            // S2 is more than twice as granular in its use of power of 2 "levels" as opposed to only
            // using the granularity of a character for geohash and H3
            // so double the precision for S2 to make it similar in scale
            final int finalPrecision = SpatialBinningType.S2.equals(type) ? precision * 2 : precision;
            final NumericStatsStatistic stat = new NumericStatsStatistic(featureType.getTypeName(), "LOSS");
            final SpatialFieldValueBinningStrategy strategy = new SpatialFieldValueBinningStrategy(geometryField);
            strategy.setPrecision(finalPrecision);
            strategy.setType(type);
            stat.setTag(String.format("Loss-Stats-%s-%d", type, finalPrecision));
            stat.setBinningStrategy(strategy);
            final BinningStrategyKey key = new BinningStrategyKey(strategy);
            stats.put(key, stat);
            final Geometry[] refGeoms = new Geometry[TEST_ENVELOPES.length + geometryFilters.length];
            for (int i = 0; i < TEST_ENVELOPES.length; i++) {
                refGeoms[i] = GeometryUtils.GEOMETRY_FACTORY.toGeometry(TEST_ENVELOPES[i]);
                final ByteArray[] bins = type.getSpatialBins(refGeoms[i], finalPrecision);
                for (final ByteArray bin : bins) {
                    refGeoms[i] = refGeoms[i].union(type.getBinGeometry(bin, finalPrecision));
                }
            }
            for (int i = 0; i < geometryFilters.length; i++) {
                final int refGeomIdx = i + TEST_ENVELOPES.length;
                refGeoms[refGeomIdx] = geometryFilters[i];
                final ByteArray[] bins = type.getSpatialBins(refGeoms[refGeomIdx], finalPrecision);
                for (final ByteArray bin : bins) {
                    refGeoms[refGeomIdx] = refGeoms[refGeomIdx].union(type.getBinGeometry(bin, finalPrecision));
                }
            }
            referenceGeometries.put(key, refGeoms);
        }
    }
    store.addStatistic(stats.values().toArray(new Statistic[stats.size()]));
    // just iterate through all the data to sum up loss as a whole and per area
    final Map<BinningStrategyKey, StatsAccumulator[]> statAccsPerStrategy = new HashMap<>();
    final StatsAccumulator referenceFullScanStatsAccumulator = new StatsAccumulator();
    for (final BinningStrategyKey key : stats.keySet()) {
        final StatsAccumulator[] referenceStatsAccumulators = new StatsAccumulator[TEST_ENVELOPES.length + geometryFilters.length];
        for (int i = 0; i < referenceStatsAccumulators.length; i++) {
            referenceStatsAccumulators[i] = new StatsAccumulator();
        }
        statAccsPerStrategy.put(key, referenceStatsAccumulators);
    }
    try (CloseableIterator<SimpleFeature> it = store.query(VectorQueryBuilder.newBuilder().addTypeName(featureType.getTypeName()).build())) {
        while (it.hasNext()) {
            final SimpleFeature f = it.next();
            // considering centroids are being used for the hashing in this case, just use centroids for
            // this reference
            final Point centroid = ((Geometry) f.getDefaultGeometry()).getCentroid();
            // turns out some of the centroids are "exactly" on the border of hashes, this disambiguates
            // the border (essentially rounding it up)
            final Point centroidOffset = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(centroid.getX() + STATS_COMPARE_EPSILON, centroid.getY() + STATS_COMPARE_EPSILON));
            final double loss = ((Number) f.getAttribute("LOSS")).doubleValue();
            referenceFullScanStatsAccumulator.add(loss);
            for (final BinningStrategyKey key : stats.keySet()) {
                final StatsAccumulator[] referenceStatsAccumulators = statAccsPerStrategy.get(key);
                final Geometry[] refGeoms = referenceGeometries.get(key);
                for (int i = 0; i < refGeoms.length; i++) {
                    if (refGeoms[i].contains(centroidOffset)) {
                        referenceStatsAccumulators[i].add(loss);
                    }
                }
            }
        }
    }
    final Stats referenceFullScanStats = referenceFullScanStatsAccumulator.snapshot();
    final Map<BinningStrategyKey, Stats[]> referenceStatsPerStrategy = new HashMap<>();
    statAccsPerStrategy.forEach((k, v) -> {
        referenceStatsPerStrategy.put(k, Arrays.stream(v).map(a -> a.snapshot()).toArray(Stats[]::new));
    });
    for (final Entry<BinningStrategyKey, NumericStatsStatistic> entry : stats.entrySet()) {
        final NumericStatsStatistic stat = entry.getValue();
        final Stats[] referenceStats = ArrayUtils.add(referenceStatsPerStrategy.get(entry.getKey()), referenceFullScanStats);
        final Stats[] perBinStats = new Stats[referenceStats.length];
        final Stats[] statValue = new Stats[referenceStats.length];
        fillStats(perBinStats, statValue, perBinStats.length - 1, stat, store, BinConstraints.allBins());
        for (int i = 0; i < TEST_ENVELOPES.length; i++) {
            fillStats(perBinStats, statValue, i, stat, store, BinConstraints.ofObject(TEST_ENVELOPES[i]));
        }
        for (int i = 0; i < geometryFilters.length; i++) {
            fillStats(perBinStats, statValue, i + TEST_ENVELOPES.length, stat, store, BinConstraints.ofObject(geometryFilters[i]));
        }
        final double geometricErrorThreshold = TYPE_TO_ERROR_THRESHOLD.get(entry.getKey().type);
        for (int i = 0; i < perBinStats.length; i++) {
            // now just assert that the reference value equals the accumulated value which equals the
            // aggregated "getStatisticValue"
            // for the full scan we can make an exact assertion (to the level of precision of floating
            // point error)
            // for the geometrically constrained assertions we'll need to assert based on the provided
            // error thresholds of the binning strategy (eg. H3 has very poor approximations for
            // line/poly to h3 coords which come into play for the geometrically constrained assertions)
            final boolean isGeometricallyConstrained = (i != (perBinStats.length - 1));
            if (isGeometricallyConstrained) {
                Assert.assertEquals(String.format("Per Bin Stats [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].count(), perBinStats[i].count()), geometricErrorThreshold);
                Assert.assertEquals(String.format("getStatisticValue [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].count(), statValue[i].count()), geometricErrorThreshold);
                Assert.assertEquals(String.format("Per Bin Stats [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].mean(), perBinStats[i].mean()), geometricErrorThreshold);
                Assert.assertEquals(String.format("Per Bin Stats [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].populationVariance(), perBinStats[i].populationVariance()), geometricErrorThreshold);
                Assert.assertEquals(String.format("getStatisticValue [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].mean(), statValue[i].mean()), geometricErrorThreshold);
                Assert.assertEquals(String.format("getStatisticValue [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].populationVariance(), statValue[i].populationVariance()), geometricErrorThreshold);
            } else {
                Assert.assertEquals(String.format("Per Bin Stats [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].count(), perBinStats[i].count());
                Assert.assertEquals(String.format("getStatisticValue [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].count(), statValue[i].count());
                Assert.assertEquals(String.format("Per Bin Stats [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].mean(), perBinStats[i].mean(), STATS_COMPARE_EPSILON);
                Assert.assertEquals(String.format("Per Bin Stats [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].populationVariance(), perBinStats[i].populationVariance(), STATS_COMPARE_EPSILON);
                Assert.assertEquals(String.format("getStatisticValue [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].mean(), statValue[i].mean(), STATS_COMPARE_EPSILON);
                Assert.assertEquals(String.format("getStatisticValue [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].populationVariance(), statValue[i].populationVariance(), STATS_COMPARE_EPSILON);
            }
        }
    }
}
Also used : SpatialBinningType(org.locationtech.geowave.core.geotime.binning.SpatialBinningType) HashMap(java.util.HashMap) SpatialFieldValueBinningStrategy(org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy) NumericStatsStatistic(org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic) Statistic(org.locationtech.geowave.core.store.api.Statistic) CountStatistic(org.locationtech.geowave.core.store.statistics.adapter.CountStatistic) StatsAccumulator(org.locationtech.geowave.core.store.statistics.field.StatsAccumulator) ByteArray(org.locationtech.geowave.core.index.ByteArray) NumericStatsStatistic(org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic) Point(org.locationtech.jts.geom.Point) TreeMap(java.util.TreeMap) Point(org.locationtech.jts.geom.Point) SimpleFeature(org.opengis.feature.simple.SimpleFeature) Geometry(org.locationtech.jts.geom.Geometry) Coordinate(org.locationtech.jts.geom.Coordinate) Stats(org.locationtech.geowave.core.store.statistics.field.Stats) File(java.io.File)

Example 2 with StatsAccumulator

use of org.locationtech.geowave.core.store.statistics.field.StatsAccumulator in project geowave by locationtech.

the class GeoWaveSpatialBinningStatisticsIT method accumulatePerBinStats.

private static Stats accumulatePerBinStats(final CloseableIterator<Pair<ByteArray, Stats>> it) {
    final StatsAccumulator acc = new StatsAccumulator();
    while (it.hasNext()) {
        final Pair<ByteArray, Stats> pair = it.next();
        acc.addAll(pair.getRight());
    }
    return acc.snapshot();
}
Also used : StatsAccumulator(org.locationtech.geowave.core.store.statistics.field.StatsAccumulator) Stats(org.locationtech.geowave.core.store.statistics.field.Stats) ByteArray(org.locationtech.geowave.core.index.ByteArray)

Aggregations

ByteArray (org.locationtech.geowave.core.index.ByteArray)2 Stats (org.locationtech.geowave.core.store.statistics.field.Stats)2 StatsAccumulator (org.locationtech.geowave.core.store.statistics.field.StatsAccumulator)2 File (java.io.File)1 HashMap (java.util.HashMap)1 TreeMap (java.util.TreeMap)1 SpatialBinningType (org.locationtech.geowave.core.geotime.binning.SpatialBinningType)1 SpatialFieldValueBinningStrategy (org.locationtech.geowave.core.geotime.store.statistics.binning.SpatialFieldValueBinningStrategy)1 Statistic (org.locationtech.geowave.core.store.api.Statistic)1 CountStatistic (org.locationtech.geowave.core.store.statistics.adapter.CountStatistic)1 NumericStatsStatistic (org.locationtech.geowave.core.store.statistics.field.NumericStatsStatistic)1 Coordinate (org.locationtech.jts.geom.Coordinate)1 Geometry (org.locationtech.jts.geom.Geometry)1 Point (org.locationtech.jts.geom.Point)1 SimpleFeature (org.opengis.feature.simple.SimpleFeature)1