use of org.locationtech.geowave.core.store.statistics.field.StatsAccumulator in project geowave by locationtech.
the class GeoWaveSpatialBinningStatisticsIT method testNumericStat.
private static void testNumericStat(final SimpleFeatureType featureType, final DataStore store) throws MalformedURLException, IOException {
final Geometry[] geometryFilters = new Geometry[] { (Geometry) TestUtils.resourceToFeature(new File(TEST_POLYGON_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_BOX_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_POLYGON_TEMPORAL_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_BOX_TEMPORAL_FILTER_FILE).toURI().toURL()).getDefaultGeometry() };
// Note: this test is only applicable for the hail (points) and tornado (lines) types
final String geometryField = featureType.getGeometryDescriptor().getLocalName();
// we're using a tree map just to make iteration ordered, predictable, and sensible
final Map<BinningStrategyKey, NumericStatsStatistic> stats = new TreeMap<>(Comparator.comparing(BinningStrategyKey::getName));
// because each gridding system will be overly inclusive, we need to determine the appropriate
// over-inclusive reference geometry per gridding system to reliably verify results
final Map<BinningStrategyKey, Geometry[]> referenceGeometries = new HashMap<>();
for (final SpatialBinningType type : SpatialBinningType.values()) {
for (int precision = 1; precision < 4; precision++) {
// S2 is more than twice as granular in its use of power of 2 "levels" as opposed to only
// using the granularity of a character for geohash and H3
// so double the precision for S2 to make it similar in scale
final int finalPrecision = SpatialBinningType.S2.equals(type) ? precision * 2 : precision;
final NumericStatsStatistic stat = new NumericStatsStatistic(featureType.getTypeName(), "LOSS");
final SpatialFieldValueBinningStrategy strategy = new SpatialFieldValueBinningStrategy(geometryField);
strategy.setPrecision(finalPrecision);
strategy.setType(type);
stat.setTag(String.format("Loss-Stats-%s-%d", type, finalPrecision));
stat.setBinningStrategy(strategy);
final BinningStrategyKey key = new BinningStrategyKey(strategy);
stats.put(key, stat);
final Geometry[] refGeoms = new Geometry[TEST_ENVELOPES.length + geometryFilters.length];
for (int i = 0; i < TEST_ENVELOPES.length; i++) {
refGeoms[i] = GeometryUtils.GEOMETRY_FACTORY.toGeometry(TEST_ENVELOPES[i]);
final ByteArray[] bins = type.getSpatialBins(refGeoms[i], finalPrecision);
for (final ByteArray bin : bins) {
refGeoms[i] = refGeoms[i].union(type.getBinGeometry(bin, finalPrecision));
}
}
for (int i = 0; i < geometryFilters.length; i++) {
final int refGeomIdx = i + TEST_ENVELOPES.length;
refGeoms[refGeomIdx] = geometryFilters[i];
final ByteArray[] bins = type.getSpatialBins(refGeoms[refGeomIdx], finalPrecision);
for (final ByteArray bin : bins) {
refGeoms[refGeomIdx] = refGeoms[refGeomIdx].union(type.getBinGeometry(bin, finalPrecision));
}
}
referenceGeometries.put(key, refGeoms);
}
}
store.addStatistic(stats.values().toArray(new Statistic[stats.size()]));
// just iterate through all the data to sum up loss as a whole and per area
final Map<BinningStrategyKey, StatsAccumulator[]> statAccsPerStrategy = new HashMap<>();
final StatsAccumulator referenceFullScanStatsAccumulator = new StatsAccumulator();
for (final BinningStrategyKey key : stats.keySet()) {
final StatsAccumulator[] referenceStatsAccumulators = new StatsAccumulator[TEST_ENVELOPES.length + geometryFilters.length];
for (int i = 0; i < referenceStatsAccumulators.length; i++) {
referenceStatsAccumulators[i] = new StatsAccumulator();
}
statAccsPerStrategy.put(key, referenceStatsAccumulators);
}
try (CloseableIterator<SimpleFeature> it = store.query(VectorQueryBuilder.newBuilder().addTypeName(featureType.getTypeName()).build())) {
while (it.hasNext()) {
final SimpleFeature f = it.next();
// considering centroids are being used for the hashing in this case, just use centroids for
// this reference
final Point centroid = ((Geometry) f.getDefaultGeometry()).getCentroid();
// turns out some of the centroids are "exactly" on the border of hashes, this disambiguates
// the border (essentially rounding it up)
final Point centroidOffset = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(centroid.getX() + STATS_COMPARE_EPSILON, centroid.getY() + STATS_COMPARE_EPSILON));
final double loss = ((Number) f.getAttribute("LOSS")).doubleValue();
referenceFullScanStatsAccumulator.add(loss);
for (final BinningStrategyKey key : stats.keySet()) {
final StatsAccumulator[] referenceStatsAccumulators = statAccsPerStrategy.get(key);
final Geometry[] refGeoms = referenceGeometries.get(key);
for (int i = 0; i < refGeoms.length; i++) {
if (refGeoms[i].contains(centroidOffset)) {
referenceStatsAccumulators[i].add(loss);
}
}
}
}
}
final Stats referenceFullScanStats = referenceFullScanStatsAccumulator.snapshot();
final Map<BinningStrategyKey, Stats[]> referenceStatsPerStrategy = new HashMap<>();
statAccsPerStrategy.forEach((k, v) -> {
referenceStatsPerStrategy.put(k, Arrays.stream(v).map(a -> a.snapshot()).toArray(Stats[]::new));
});
for (final Entry<BinningStrategyKey, NumericStatsStatistic> entry : stats.entrySet()) {
final NumericStatsStatistic stat = entry.getValue();
final Stats[] referenceStats = ArrayUtils.add(referenceStatsPerStrategy.get(entry.getKey()), referenceFullScanStats);
final Stats[] perBinStats = new Stats[referenceStats.length];
final Stats[] statValue = new Stats[referenceStats.length];
fillStats(perBinStats, statValue, perBinStats.length - 1, stat, store, BinConstraints.allBins());
for (int i = 0; i < TEST_ENVELOPES.length; i++) {
fillStats(perBinStats, statValue, i, stat, store, BinConstraints.ofObject(TEST_ENVELOPES[i]));
}
for (int i = 0; i < geometryFilters.length; i++) {
fillStats(perBinStats, statValue, i + TEST_ENVELOPES.length, stat, store, BinConstraints.ofObject(geometryFilters[i]));
}
final double geometricErrorThreshold = TYPE_TO_ERROR_THRESHOLD.get(entry.getKey().type);
for (int i = 0; i < perBinStats.length; i++) {
// now just assert that the reference value equals the accumulated value which equals the
// aggregated "getStatisticValue"
// for the full scan we can make an exact assertion (to the level of precision of floating
// point error)
// for the geometrically constrained assertions we'll need to assert based on the provided
// error thresholds of the binning strategy (eg. H3 has very poor approximations for
// line/poly to h3 coords which come into play for the geometrically constrained assertions)
final boolean isGeometricallyConstrained = (i != (perBinStats.length - 1));
if (isGeometricallyConstrained) {
Assert.assertEquals(String.format("Per Bin Stats [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].count(), perBinStats[i].count()), geometricErrorThreshold);
Assert.assertEquals(String.format("getStatisticValue [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].count(), statValue[i].count()), geometricErrorThreshold);
Assert.assertEquals(String.format("Per Bin Stats [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].mean(), perBinStats[i].mean()), geometricErrorThreshold);
Assert.assertEquals(String.format("Per Bin Stats [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].populationVariance(), perBinStats[i].populationVariance()), geometricErrorThreshold);
Assert.assertEquals(String.format("getStatisticValue [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].mean(), statValue[i].mean()), geometricErrorThreshold);
Assert.assertEquals(String.format("getStatisticValue [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].populationVariance(), statValue[i].populationVariance()), geometricErrorThreshold);
} else {
Assert.assertEquals(String.format("Per Bin Stats [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].count(), perBinStats[i].count());
Assert.assertEquals(String.format("getStatisticValue [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].count(), statValue[i].count());
Assert.assertEquals(String.format("Per Bin Stats [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].mean(), perBinStats[i].mean(), STATS_COMPARE_EPSILON);
Assert.assertEquals(String.format("Per Bin Stats [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].populationVariance(), perBinStats[i].populationVariance(), STATS_COMPARE_EPSILON);
Assert.assertEquals(String.format("getStatisticValue [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].mean(), statValue[i].mean(), STATS_COMPARE_EPSILON);
Assert.assertEquals(String.format("getStatisticValue [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].populationVariance(), statValue[i].populationVariance(), STATS_COMPARE_EPSILON);
}
}
}
}
use of org.locationtech.geowave.core.store.statistics.field.StatsAccumulator in project geowave by locationtech.
the class GeoWaveSpatialBinningStatisticsIT method accumulatePerBinStats.
private static Stats accumulatePerBinStats(final CloseableIterator<Pair<ByteArray, Stats>> it) {
final StatsAccumulator acc = new StatsAccumulator();
while (it.hasNext()) {
final Pair<ByteArray, Stats> pair = it.next();
acc.addAll(pair.getRight());
}
return acc.snapshot();
}
Aggregations