use of org.locationtech.geowave.core.store.statistics.binning.NumericRangeFieldValueBinningStrategy in project geowave by locationtech.
the class GeoWaveStatisticsIT method testAddStatisticWithBinningStrategy.
@Test
public void testAddStatisticWithBinningStrategy() {
DataStore ds = dataStore.createDataStore();
NumericRangeStatistic longitudeRange = new NumericRangeStatistic(SimpleIngest.FEATURE_NAME, "Longitude");
// binning by the same as the statistic should be easy to sanity check
longitudeRange.setBinningStrategy(new NumericRangeFieldValueBinningStrategy("Longitude"));
NumericRangeStatistic latitudeRange = new NumericRangeStatistic(SimpleIngest.FEATURE_NAME, "Latitude");
latitudeRange.setBinningStrategy(new NumericRangeFieldValueBinningStrategy(45, "Latitude"));
TimeRangeStatistic timeRangeHourBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
timeRangeHourBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.HOUR, "TimeStamp"));
timeRangeHourBin.setTag("hour");
TimeRangeStatistic timeRangeDayBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
timeRangeDayBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.DAY, "TimeStamp"));
timeRangeDayBin.setTag("day");
TimeRangeStatistic timeRangeWeekBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
timeRangeWeekBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.WEEK, "TimeStamp"));
timeRangeWeekBin.setTag("week");
TimeRangeStatistic timeRangeMonthBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
timeRangeMonthBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.MONTH, "TimeStamp"));
timeRangeMonthBin.setTag("month");
TimeRangeStatistic timeRangeYearBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
timeRangeYearBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.YEAR, "TimeStamp"));
timeRangeYearBin.setTag("year");
CountStatistic countByGridUsingMultifield = new CountStatistic(SimpleIngest.FEATURE_NAME);
countByGridUsingMultifield.setTag("multifield-latlon");
countByGridUsingMultifield.setBinningStrategy(new NumericRangeFieldValueBinningStrategy(45, "Latitude", "Longitude"));
CountStatistic countByGridUsingComposite = new CountStatistic(SimpleIngest.FEATURE_NAME);
countByGridUsingComposite.setTag("composite-latlon");
countByGridUsingComposite.setBinningStrategy(new CompositeBinningStrategy(new NumericRangeFieldValueBinningStrategy(45, 22.5, "Latitude"), new NumericRangeFieldValueBinningStrategy(90, 45, "Longitude")));
long min = Long.MAX_VALUE, max = Long.MIN_VALUE;
try (CloseableIterator<SimpleFeature> it = ds.query(VectorQueryBuilder.newBuilder().build())) {
while (it.hasNext()) {
final long time = ((Date) it.next().getAttribute("TimeStamp")).getTime();
min = Math.min(min, time);
max = Math.max(max, time);
}
}
final Interval overallInterval = Interval.of(Instant.ofEpochMilli(min), Instant.ofEpochMilli(max));
ds.addStatistic(longitudeRange, latitudeRange, timeRangeHourBin, timeRangeDayBin, timeRangeWeekBin, timeRangeMonthBin, timeRangeYearBin, countByGridUsingMultifield, countByGridUsingComposite);
// let's make sure seralization/deserialization works for stats
ds = dataStore.createDataStore();
longitudeRange = (NumericRangeStatistic) ds.getFieldStatistic(longitudeRange.getStatisticType(), longitudeRange.getTypeName(), longitudeRange.getFieldName(), longitudeRange.getTag());
latitudeRange = (NumericRangeStatistic) ds.getFieldStatistic(latitudeRange.getStatisticType(), latitudeRange.getTypeName(), latitudeRange.getFieldName(), latitudeRange.getTag());
timeRangeHourBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeHourBin.getStatisticType(), timeRangeHourBin.getTypeName(), timeRangeHourBin.getFieldName(), timeRangeHourBin.getTag());
timeRangeDayBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeDayBin.getStatisticType(), timeRangeDayBin.getTypeName(), timeRangeDayBin.getFieldName(), timeRangeDayBin.getTag());
timeRangeWeekBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeWeekBin.getStatisticType(), timeRangeWeekBin.getTypeName(), timeRangeWeekBin.getFieldName(), timeRangeWeekBin.getTag());
timeRangeMonthBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeMonthBin.getStatisticType(), timeRangeMonthBin.getTypeName(), timeRangeMonthBin.getFieldName(), timeRangeMonthBin.getTag());
timeRangeYearBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeYearBin.getStatisticType(), timeRangeYearBin.getTypeName(), timeRangeYearBin.getFieldName(), timeRangeYearBin.getTag());
countByGridUsingMultifield = (CountStatistic) ds.getDataTypeStatistic(countByGridUsingMultifield.getStatisticType(), countByGridUsingMultifield.getTypeName(), countByGridUsingMultifield.getTag());
countByGridUsingComposite = (CountStatistic) ds.getDataTypeStatistic(countByGridUsingComposite.getStatisticType(), countByGridUsingComposite.getTypeName(), countByGridUsingComposite.getTag());
Range<Double> rangeValue = ds.getStatisticValue(longitudeRange);
assertEquals(-165.0, rangeValue.getMinimum(), 0.1);
assertEquals(180.0, rangeValue.getMaximum(), 0.1);
rangeValue = ds.getStatisticValue(latitudeRange);
assertEquals(-90.0, rangeValue.getMinimum(), 0.1);
assertEquals(85.0, rangeValue.getMaximum(), 0.1);
// Verify count statistic exists
final Statistic<CountValue> countStat = ds.getDataTypeStatistic(CountStatistic.STATS_TYPE, SimpleIngest.FEATURE_NAME, Statistic.INTERNAL_TAG);
assertNotNull(countStat);
// Verify value exists
Long countValue = ds.getStatisticValue(countStat);
assertEquals(new Long(20), countValue);
countValue = ds.getStatisticValue(countByGridUsingMultifield);
assertEquals(new Long(20), countValue);
countValue = ds.getStatisticValue(countByGridUsingComposite);
assertEquals(new Long(20), countValue);
try (CloseableIterator<Pair<ByteArray, Range<Double>>> iterator = ds.getBinnedStatisticValues(longitudeRange)) {
int count = 0;
while (iterator.hasNext()) {
final Pair<ByteArray, Range<Double>> binValue = iterator.next();
final Range<Double> binRange = ((NumericRangeFieldValueBinningStrategy) longitudeRange.getBinningStrategy()).getRange(binValue.getKey());
assertEquals(1, binRange.getMaximum() - binRange.getMinimum(), 0.1);
assertTrue(binRange.containsRange(binValue.getValue()));
count++;
}
assertEquals(20, count);
}
try (CloseableIterator<Pair<ByteArray, Range<Double>>> iterator = ds.getBinnedStatisticValues(latitudeRange)) {
int count = 0;
while (iterator.hasNext()) {
final Pair<ByteArray, Range<Double>> binValue = iterator.next();
final Range<Double> binRange = ((NumericRangeFieldValueBinningStrategy) latitudeRange.getBinningStrategy()).getRange(binValue.getKey());
assertEquals(45, binRange.getMaximum() - binRange.getMinimum(), 0.1);
assertTrue(binRange.containsRange(binValue.getValue()));
count++;
}
assertEquals(4, count);
}
try (CloseableIterator<Pair<ByteArray, Range<Double>>> iterator = ds.getBinnedStatisticValues(latitudeRange)) {
int count = 0;
while (iterator.hasNext()) {
final Pair<ByteArray, Range<Double>> binValue = iterator.next();
final Range<Double> binRange = ((NumericRangeFieldValueBinningStrategy) latitudeRange.getBinningStrategy()).getRange(binValue.getKey());
assertEquals(45, binRange.getMaximum() - binRange.getMinimum(), 0.1);
assertTrue(binRange.containsRange(binValue.getValue()));
count++;
}
assertEquals(4, count);
}
assertTimeBinning(ds, timeRangeHourBin, 20, (i) -> Duration.ofHours(1L), overallInterval);
assertTimeBinning(ds, timeRangeDayBin, 20, (i) -> Duration.ofDays(1L), overallInterval);
assertTimeBinning(ds, timeRangeWeekBin, 20, (i) -> Duration.ofDays(7L), overallInterval);
assertTimeBinning(ds, timeRangeMonthBin, 12, (i) -> {
final Calendar cal = Calendar.getInstance();
cal.setTimeInMillis(i.getStart().toEpochMilli());
return Duration.ofDays(cal.getActualMaximum(Calendar.DAY_OF_MONTH));
}, overallInterval);
assertTimeBinning(ds, timeRangeYearBin, 1, (i) -> {
final Calendar cal = Calendar.getInstance();
cal.setTimeInMillis(i.getStart().toEpochMilli());
return Duration.ofDays(cal.getActualMaximum(Calendar.DAY_OF_YEAR));
}, overallInterval);
final Set<ByteArray> multiFieldFilteredExpectedResults = new HashSet<>();
int multiFieldFilteredExpectedCount = 0;
try (CloseableIterator<Pair<ByteArray, Long>> iterator = ds.getBinnedStatisticValues(countByGridUsingMultifield)) {
int count = 0;
while (iterator.hasNext()) {
final Pair<ByteArray, Long> binValue = iterator.next();
final Map<String, Range<Double>> rangePerField = ((NumericRangeFieldValueBinningStrategy) countByGridUsingMultifield.getBinningStrategy()).getRanges(binValue.getKey());
assertEquals(1L, binValue.getValue().longValue());
assertEquals(2, rangePerField.size());
final Range<Double> latRange = rangePerField.get("Latitude");
final Range<Double> lonRange = rangePerField.get("Longitude");
// this ensures the interval is 45
assertEquals(45, latRange.getMaximum() - latRange.getMinimum(), 0.1);
assertEquals(45, lonRange.getMaximum() - lonRange.getMinimum(), 0.1);
// this ensures the offset is 0
assertEquals(0.0, latRange.getMinimum() % 45.0, 0.1);
assertEquals(0.0, lonRange.getMinimum() % 45.0, 0.1);
if (latRange.isOverlappedBy(Range.is(12.0)) && lonRange.isOverlappedBy(Range.between(-89.0, 89.0))) {
multiFieldFilteredExpectedResults.add(binValue.getKey());
multiFieldFilteredExpectedCount += binValue.getValue();
}
count++;
}
assertEquals(20, count);
}
// now query by object constraints on the gridded bins
try (CloseableIterator<Pair<ByteArray, Long>> iterator = ds.getBinnedStatisticValues(countByGridUsingMultifield, BinConstraints.ofObject(new Pair[] { Pair.of("Latitude", Double.valueOf(12.0)), Pair.of("Longitude", Range.between(-89.0, 89.0)) }))) {
final Set<ByteArray> multiFieldFilteredActualResults = new HashSet<>();
int count = 0;
while (iterator.hasNext()) {
final Pair<ByteArray, Long> binValue = iterator.next();
final Map<String, Range<Double>> rangePerField = ((NumericRangeFieldValueBinningStrategy) countByGridUsingMultifield.getBinningStrategy()).getRanges(binValue.getKey());
assertEquals(1L, binValue.getValue().longValue());
assertEquals(2, rangePerField.size());
final Range<Double> latRange = rangePerField.get("Latitude");
final Range<Double> lonRange = rangePerField.get("Longitude");
// this ensures the interval is 45
assertEquals(0.0, latRange.getMinimum(), 0.1);
assertEquals(45.0, latRange.getMaximum(), 0.1);
assertEquals(45, lonRange.getMaximum() - lonRange.getMinimum(), 0.1);
assertTrue(lonRange.getMaximum() < 90.1);
assertTrue(lonRange.getMinimum() > -90.1);
// this ensures the offset is 0
assertEquals(0.0, latRange.getMinimum() % 45.0, 0.1);
assertEquals(0.0, lonRange.getMinimum() % 45.0, 0.1);
count += binValue.getValue();
multiFieldFilteredActualResults.add(binValue.getKey());
}
assertEquals(multiFieldFilteredExpectedCount, count);
assertTrue(multiFieldFilteredExpectedResults.containsAll(multiFieldFilteredActualResults));
assertTrue(multiFieldFilteredActualResults.containsAll(multiFieldFilteredExpectedResults));
}
final Set<ByteArray> compositeFilteredExpectedResults = new HashSet<>();
int compositeFilteredExpectedCount = 0;
try (CloseableIterator<Pair<ByteArray, Long>> iterator = ds.getBinnedStatisticValues(countByGridUsingComposite)) {
int count = 0;
int totalCount = 0;
while (iterator.hasNext()) {
final Pair<ByteArray, Long> binValue = iterator.next();
totalCount += binValue.getValue();
final Pair<StatisticBinningStrategy, ByteArray>[] bins = ((CompositeBinningStrategy) countByGridUsingComposite.getBinningStrategy()).getSubBins(binValue.getKey());
assertEquals(2, bins.length);
final Range<Double> latRange = ((NumericRangeFieldValueBinningStrategy) bins[0].getLeft()).getRange(bins[0].getRight());
final Range<Double> lonRange = ((NumericRangeFieldValueBinningStrategy) bins[1].getLeft()).getRange(bins[1].getRight());
// this ensures the interval is 45 and 90 respectively
assertEquals(45, latRange.getMaximum() - latRange.getMinimum(), 0.1);
assertEquals(90, lonRange.getMaximum() - lonRange.getMinimum(), 0.1);
// this ensures the offset is 22.5 and 45 respectively
assertEquals(22.5, Math.abs(latRange.getMinimum() % 45.0), 0.1);
assertEquals(45.0, Math.abs(lonRange.getMinimum() % 90.0), 0.1);
count++;
if (latRange.isOverlappedBy(Range.between(-44.0, 44.0)) && lonRange.isOverlappedBy(Range.between(-179.0, 89.0))) {
compositeFilteredExpectedResults.add(binValue.getKey());
compositeFilteredExpectedCount += binValue.getValue();
}
}
assertEquals(16, count);
assertEquals(20, totalCount);
}
try (CloseableIterator<Pair<ByteArray, Long>> iterator = ds.getBinnedStatisticValues(countByGridUsingComposite, BinConstraints.ofObject(new Range[] { Range.between(-44.0, 44.0), Range.between(-179.0, 89.0) }))) {
final Set<ByteArray> compositeFilteredActualResults = new HashSet<>();
int totalCount = 0;
while (iterator.hasNext()) {
final Pair<ByteArray, Long> binValue = iterator.next();
totalCount += binValue.getValue();
final Pair<StatisticBinningStrategy, ByteArray>[] bins = ((CompositeBinningStrategy) countByGridUsingComposite.getBinningStrategy()).getSubBins(binValue.getKey());
assertEquals(2, bins.length);
final Range<Double> latRange = ((NumericRangeFieldValueBinningStrategy) bins[0].getLeft()).getRange(bins[0].getRight());
final Range<Double> lonRange = ((NumericRangeFieldValueBinningStrategy) bins[1].getLeft()).getRange(bins[1].getRight());
// this ensures the interval is 45 and 90 respectively
assertEquals(45, latRange.getMaximum() - latRange.getMinimum(), 0.1);
assertEquals(90, lonRange.getMaximum() - lonRange.getMinimum(), 0.1);
// this ensures the offset is 22.5 and 45 respectively
assertEquals(22.5, Math.abs(latRange.getMinimum() % 45.0), 0.1);
assertEquals(45.0, Math.abs(lonRange.getMinimum() % 90.0), 0.1);
assertTrue(latRange.getMaximum() < 67.6);
assertTrue(latRange.getMinimum() > -67.6);
assertTrue(lonRange.getMaximum() < 135.1);
assertTrue(lonRange.getMinimum() > -225.1);
compositeFilteredActualResults.add(binValue.getKey());
}
assertTrue(compositeFilteredExpectedResults.containsAll(compositeFilteredActualResults));
assertTrue(compositeFilteredActualResults.containsAll(compositeFilteredExpectedResults));
assertEquals(compositeFilteredExpectedCount, totalCount);
}
}
Aggregations