use of org.locationtech.geowave.core.store.api.Statistic in project geowave by locationtech.
the class IndexImpl method getDefaultStatistics.
@Override
public List<Statistic<? extends StatisticValue<?>>> getDefaultStatistics() {
List<Statistic<? extends StatisticValue<?>>> statistics = Lists.newArrayListWithCapacity(6);
IndexMetaDataSetStatistic metadata = new IndexMetaDataSetStatistic(getName(), indexStrategy.createMetaData());
metadata.setBinningStrategy(new DataTypeBinningStrategy());
metadata.setInternal();
statistics.add(metadata);
DuplicateEntryCountStatistic duplicateCounts = new DuplicateEntryCountStatistic(getName());
duplicateCounts.setBinningStrategy(new DataTypeBinningStrategy());
duplicateCounts.setInternal();
statistics.add(duplicateCounts);
PartitionsStatistic partitions = new PartitionsStatistic(getName());
partitions.setBinningStrategy(new DataTypeBinningStrategy());
partitions.setInternal();
statistics.add(partitions);
DifferingVisibilityCountStatistic differingFieldVisibility = new DifferingVisibilityCountStatistic(getName());
differingFieldVisibility.setBinningStrategy(new DataTypeBinningStrategy());
differingFieldVisibility.setInternal();
statistics.add(differingFieldVisibility);
FieldVisibilityCountStatistic fieldVisibilityCount = new FieldVisibilityCountStatistic(getName());
fieldVisibilityCount.setBinningStrategy(new DataTypeBinningStrategy());
fieldVisibilityCount.setInternal();
statistics.add(fieldVisibilityCount);
RowRangeHistogramStatistic rowRangeHistogram = new RowRangeHistogramStatistic(getName());
rowRangeHistogram.setBinningStrategy(new CompositeBinningStrategy(new DataTypeBinningStrategy(), new PartitionBinningStrategy()));
rowRangeHistogram.setInternal();
statistics.add(rowRangeHistogram);
return statistics;
}
use of org.locationtech.geowave.core.store.api.Statistic in project geowave by locationtech.
the class AbstractGeoWaveBasicVectorIT method testStats.
@SuppressWarnings("unchecked")
protected void testStats(final URL[] inputFiles, final boolean multithreaded, final CoordinateReferenceSystem crs, final Index... indices) {
// In the multithreaded case, only test min/max and count. Stats will be
// ingested/ in a different order and will not match.
final LocalFileIngestPlugin<SimpleFeature> localFileIngest = new GeoToolsVectorDataStoreIngestPlugin(Filter.INCLUDE);
final Map<String, StatisticsCache> statsCache = new HashMap<>();
final String[] indexNames = Arrays.stream(indices).map(i -> i.getName()).toArray(i -> new String[i]);
for (final URL inputFile : inputFiles) {
LOGGER.warn("Calculating stats from file '" + inputFile.getPath() + "' - this may take several minutes...");
try (final CloseableIterator<GeoWaveData<SimpleFeature>> dataIterator = localFileIngest.toGeoWaveData(inputFile, indexNames)) {
final TransientAdapterStore adapterCache = new MemoryAdapterStore(localFileIngest.getDataAdapters());
while (dataIterator.hasNext()) {
final GeoWaveData<SimpleFeature> data = dataIterator.next();
final DataTypeAdapter<SimpleFeature> adapter = data.getAdapter(adapterCache);
// it should be a statistical data adapter
if (adapter instanceof DefaultStatisticsProvider) {
StatisticsCache cachedValues = statsCache.get(adapter.getTypeName());
if (cachedValues == null) {
cachedValues = new StatisticsCache(adapter, crs);
statsCache.put(adapter.getTypeName(), cachedValues);
}
cachedValues.entryIngested(data.getValue());
}
}
}
}
final DataStatisticsStore statsStore = getDataStorePluginOptions().createDataStatisticsStore();
final PersistentAdapterStore adapterStore = getDataStorePluginOptions().createAdapterStore();
final InternalDataAdapter<?>[] adapters = adapterStore.getAdapters();
for (final InternalDataAdapter<?> internalDataAdapter : adapters) {
final FeatureDataAdapter adapter = (FeatureDataAdapter) internalDataAdapter.getAdapter();
final StatisticsCache cachedValue = statsCache.get(adapter.getTypeName());
Assert.assertNotNull(cachedValue);
final Set<Entry<Statistic<?>, Map<ByteArray, StatisticValue<?>>>> expectedStats = cachedValue.statsCache.entrySet();
int statsCount = 0;
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> statsIterator = statsStore.getDataTypeStatistics(adapter, null, null)) {
while (statsIterator.hasNext()) {
statsIterator.next();
statsCount++;
}
}
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> statsIterator = statsStore.getFieldStatistics(adapter, null, null, null)) {
while (statsIterator.hasNext()) {
statsIterator.next();
statsCount++;
}
}
Assert.assertEquals("The number of stats for data adapter '" + adapter.getTypeName() + "' do not match count expected", expectedStats.size(), statsCount);
for (final Entry<Statistic<?>, Map<ByteArray, StatisticValue<?>>> expectedStat : expectedStats) {
for (final Entry<ByteArray, StatisticValue<?>> expectedValues : expectedStat.getValue().entrySet()) {
StatisticValue<Object> actual;
if (expectedValues.getKey().equals(StatisticValue.NO_BIN)) {
actual = statsStore.getStatisticValue((Statistic<StatisticValue<Object>>) expectedStat.getKey());
} else {
actual = statsStore.getStatisticValue((Statistic<StatisticValue<Object>>) expectedStat.getKey(), expectedValues.getKey());
}
assertEquals(expectedValues.getValue().getValue(), actual.getValue());
}
}
// finally check the one stat that is more manually calculated -
// the bounding box
StatisticQuery<BoundingBoxValue, Envelope> query = StatisticQueryBuilder.newBuilder(BoundingBoxStatistic.STATS_TYPE).fieldName(adapter.getFeatureType().getGeometryDescriptor().getLocalName()).typeName(adapter.getTypeName()).build();
BoundingBoxValue bboxStat = getDataStorePluginOptions().createDataStore().aggregateStatistics(query);
validateBBox(bboxStat.getValue(), cachedValue);
// now make sure it works without giving field name because there is only one geometry field
// anyways
query = StatisticQueryBuilder.newBuilder(BoundingBoxStatistic.STATS_TYPE).typeName(adapter.getTypeName()).build();
bboxStat = getDataStorePluginOptions().createDataStore().aggregateStatistics(query);
validateBBox(bboxStat.getValue(), cachedValue);
final StatisticId<BoundingBoxValue> bboxStatId = FieldStatistic.generateStatisticId(adapter.getTypeName(), BoundingBoxStatistic.STATS_TYPE, adapter.getFeatureType().getGeometryDescriptor().getLocalName(), Statistic.INTERNAL_TAG);
Assert.assertTrue("Unable to remove individual stat", statsStore.removeStatistic(statsStore.getStatisticById(bboxStatId)));
Assert.assertNull("Individual stat was not successfully removed", statsStore.getStatisticById(bboxStatId));
}
}
use of org.locationtech.geowave.core.store.api.Statistic in project geowave by locationtech.
the class GeoWaveSpatialBinningStatisticsIT method testNumericStat.
private static void testNumericStat(final SimpleFeatureType featureType, final DataStore store) throws MalformedURLException, IOException {
final Geometry[] geometryFilters = new Geometry[] { (Geometry) TestUtils.resourceToFeature(new File(TEST_POLYGON_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_BOX_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_POLYGON_TEMPORAL_FILTER_FILE).toURI().toURL()).getDefaultGeometry(), (Geometry) TestUtils.resourceToFeature(new File(TEST_BOX_TEMPORAL_FILTER_FILE).toURI().toURL()).getDefaultGeometry() };
// Note: this test is only applicable for the hail (points) and tornado (lines) types
final String geometryField = featureType.getGeometryDescriptor().getLocalName();
// we're using a tree map just to make iteration ordered, predictable, and sensible
final Map<BinningStrategyKey, NumericStatsStatistic> stats = new TreeMap<>(Comparator.comparing(BinningStrategyKey::getName));
// because each gridding system will be overly inclusive, we need to determine the appropriate
// over-inclusive reference geometry per gridding system to reliably verify results
final Map<BinningStrategyKey, Geometry[]> referenceGeometries = new HashMap<>();
for (final SpatialBinningType type : SpatialBinningType.values()) {
for (int precision = 1; precision < 4; precision++) {
// S2 is more than twice as granular in its use of power of 2 "levels" as opposed to only
// using the granularity of a character for geohash and H3
// so double the precision for S2 to make it similar in scale
final int finalPrecision = SpatialBinningType.S2.equals(type) ? precision * 2 : precision;
final NumericStatsStatistic stat = new NumericStatsStatistic(featureType.getTypeName(), "LOSS");
final SpatialFieldValueBinningStrategy strategy = new SpatialFieldValueBinningStrategy(geometryField);
strategy.setPrecision(finalPrecision);
strategy.setType(type);
stat.setTag(String.format("Loss-Stats-%s-%d", type, finalPrecision));
stat.setBinningStrategy(strategy);
final BinningStrategyKey key = new BinningStrategyKey(strategy);
stats.put(key, stat);
final Geometry[] refGeoms = new Geometry[TEST_ENVELOPES.length + geometryFilters.length];
for (int i = 0; i < TEST_ENVELOPES.length; i++) {
refGeoms[i] = GeometryUtils.GEOMETRY_FACTORY.toGeometry(TEST_ENVELOPES[i]);
final ByteArray[] bins = type.getSpatialBins(refGeoms[i], finalPrecision);
for (final ByteArray bin : bins) {
refGeoms[i] = refGeoms[i].union(type.getBinGeometry(bin, finalPrecision));
}
}
for (int i = 0; i < geometryFilters.length; i++) {
final int refGeomIdx = i + TEST_ENVELOPES.length;
refGeoms[refGeomIdx] = geometryFilters[i];
final ByteArray[] bins = type.getSpatialBins(refGeoms[refGeomIdx], finalPrecision);
for (final ByteArray bin : bins) {
refGeoms[refGeomIdx] = refGeoms[refGeomIdx].union(type.getBinGeometry(bin, finalPrecision));
}
}
referenceGeometries.put(key, refGeoms);
}
}
store.addStatistic(stats.values().toArray(new Statistic[stats.size()]));
// just iterate through all the data to sum up loss as a whole and per area
final Map<BinningStrategyKey, StatsAccumulator[]> statAccsPerStrategy = new HashMap<>();
final StatsAccumulator referenceFullScanStatsAccumulator = new StatsAccumulator();
for (final BinningStrategyKey key : stats.keySet()) {
final StatsAccumulator[] referenceStatsAccumulators = new StatsAccumulator[TEST_ENVELOPES.length + geometryFilters.length];
for (int i = 0; i < referenceStatsAccumulators.length; i++) {
referenceStatsAccumulators[i] = new StatsAccumulator();
}
statAccsPerStrategy.put(key, referenceStatsAccumulators);
}
try (CloseableIterator<SimpleFeature> it = store.query(VectorQueryBuilder.newBuilder().addTypeName(featureType.getTypeName()).build())) {
while (it.hasNext()) {
final SimpleFeature f = it.next();
// considering centroids are being used for the hashing in this case, just use centroids for
// this reference
final Point centroid = ((Geometry) f.getDefaultGeometry()).getCentroid();
// turns out some of the centroids are "exactly" on the border of hashes, this disambiguates
// the border (essentially rounding it up)
final Point centroidOffset = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(centroid.getX() + STATS_COMPARE_EPSILON, centroid.getY() + STATS_COMPARE_EPSILON));
final double loss = ((Number) f.getAttribute("LOSS")).doubleValue();
referenceFullScanStatsAccumulator.add(loss);
for (final BinningStrategyKey key : stats.keySet()) {
final StatsAccumulator[] referenceStatsAccumulators = statAccsPerStrategy.get(key);
final Geometry[] refGeoms = referenceGeometries.get(key);
for (int i = 0; i < refGeoms.length; i++) {
if (refGeoms[i].contains(centroidOffset)) {
referenceStatsAccumulators[i].add(loss);
}
}
}
}
}
final Stats referenceFullScanStats = referenceFullScanStatsAccumulator.snapshot();
final Map<BinningStrategyKey, Stats[]> referenceStatsPerStrategy = new HashMap<>();
statAccsPerStrategy.forEach((k, v) -> {
referenceStatsPerStrategy.put(k, Arrays.stream(v).map(a -> a.snapshot()).toArray(Stats[]::new));
});
for (final Entry<BinningStrategyKey, NumericStatsStatistic> entry : stats.entrySet()) {
final NumericStatsStatistic stat = entry.getValue();
final Stats[] referenceStats = ArrayUtils.add(referenceStatsPerStrategy.get(entry.getKey()), referenceFullScanStats);
final Stats[] perBinStats = new Stats[referenceStats.length];
final Stats[] statValue = new Stats[referenceStats.length];
fillStats(perBinStats, statValue, perBinStats.length - 1, stat, store, BinConstraints.allBins());
for (int i = 0; i < TEST_ENVELOPES.length; i++) {
fillStats(perBinStats, statValue, i, stat, store, BinConstraints.ofObject(TEST_ENVELOPES[i]));
}
for (int i = 0; i < geometryFilters.length; i++) {
fillStats(perBinStats, statValue, i + TEST_ENVELOPES.length, stat, store, BinConstraints.ofObject(geometryFilters[i]));
}
final double geometricErrorThreshold = TYPE_TO_ERROR_THRESHOLD.get(entry.getKey().type);
for (int i = 0; i < perBinStats.length; i++) {
// now just assert that the reference value equals the accumulated value which equals the
// aggregated "getStatisticValue"
// for the full scan we can make an exact assertion (to the level of precision of floating
// point error)
// for the geometrically constrained assertions we'll need to assert based on the provided
// error thresholds of the binning strategy (eg. H3 has very poor approximations for
// line/poly to h3 coords which come into play for the geometrically constrained assertions)
final boolean isGeometricallyConstrained = (i != (perBinStats.length - 1));
if (isGeometricallyConstrained) {
Assert.assertEquals(String.format("Per Bin Stats [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].count(), perBinStats[i].count()), geometricErrorThreshold);
Assert.assertEquals(String.format("getStatisticValue [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].count(), statValue[i].count()), geometricErrorThreshold);
Assert.assertEquals(String.format("Per Bin Stats [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].mean(), perBinStats[i].mean()), geometricErrorThreshold);
Assert.assertEquals(String.format("Per Bin Stats [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].populationVariance(), perBinStats[i].populationVariance()), geometricErrorThreshold);
Assert.assertEquals(String.format("getStatisticValue [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].mean(), statValue[i].mean()), geometricErrorThreshold);
Assert.assertEquals(String.format("getStatisticValue [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), 1.0, getRatio(referenceStats[i].populationVariance(), statValue[i].populationVariance()), geometricErrorThreshold);
} else {
Assert.assertEquals(String.format("Per Bin Stats [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].count(), perBinStats[i].count());
Assert.assertEquals(String.format("getStatisticValue [%d] count doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].count(), statValue[i].count());
Assert.assertEquals(String.format("Per Bin Stats [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].mean(), perBinStats[i].mean(), STATS_COMPARE_EPSILON);
Assert.assertEquals(String.format("Per Bin Stats [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].populationVariance(), perBinStats[i].populationVariance(), STATS_COMPARE_EPSILON);
Assert.assertEquals(String.format("getStatisticValue [%d] mean doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].mean(), statValue[i].mean(), STATS_COMPARE_EPSILON);
Assert.assertEquals(String.format("getStatisticValue [%d] variance doesn't match full scan for %s (%d)", i, entry.getKey().type, entry.getKey().precision), referenceStats[i].populationVariance(), statValue[i].populationVariance(), STATS_COMPARE_EPSILON);
}
}
}
}
use of org.locationtech.geowave.core.store.api.Statistic in project geowave by locationtech.
the class StatisticsCache method getAdapterStatistic.
@SuppressWarnings("unchecked")
public <V extends StatisticValue<R>, R> V getAdapterStatistic(final StatisticType<V> statisticType) {
ByteArray key = statisticType;
if (cache.containsKey(key)) {
return (V) cache.get(key);
}
V retVal = null;
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> statsIter = statisticsStore.getDataTypeStatistics(adapter, statisticType, null)) {
if (statsIter.hasNext()) {
Statistic<V> stat = (Statistic<V>) statsIter.next();
V value = statisticsStore.getStatisticValue(stat, authorizations);
if (value != null) {
retVal = value;
}
}
}
cache.put(key, retVal);
return retVal;
}
use of org.locationtech.geowave.core.store.api.Statistic in project geowave by locationtech.
the class StatisticsCache method getFieldStatistic.
@SuppressWarnings("unchecked")
public <V extends StatisticValue<R>, R> V getFieldStatistic(final StatisticType<V> statisticType, final String fieldName) {
if (statisticType == null || fieldName == null) {
return null;
}
ByteArray key = new ByteArray(Bytes.concat(statisticType.getBytes(), StatisticId.UNIQUE_ID_SEPARATOR, StringUtils.stringToBinary(fieldName)));
if (cache.containsKey(key)) {
return (V) cache.get(key);
}
V retVal = null;
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> statsIter = statisticsStore.getFieldStatistics(adapter, statisticType, fieldName, null)) {
if (statsIter.hasNext()) {
Statistic<V> stat = (Statistic<V>) statsIter.next();
V value = statisticsStore.getStatisticValue(stat, authorizations);
if (value != null) {
retVal = value;
}
}
}
cache.put(key, retVal);
return retVal;
}
Aggregations