Search in sources :

Example 1 with NumericRangeFieldValueBinningStrategy

use of org.locationtech.geowave.core.store.statistics.binning.NumericRangeFieldValueBinningStrategy in project geowave by locationtech.

the class GeoWaveStatisticsIT method testAddStatisticWithBinningStrategy.

@Test
public void testAddStatisticWithBinningStrategy() {
    DataStore ds = dataStore.createDataStore();
    NumericRangeStatistic longitudeRange = new NumericRangeStatistic(SimpleIngest.FEATURE_NAME, "Longitude");
    // binning by the same as the statistic should be easy to sanity check
    longitudeRange.setBinningStrategy(new NumericRangeFieldValueBinningStrategy("Longitude"));
    NumericRangeStatistic latitudeRange = new NumericRangeStatistic(SimpleIngest.FEATURE_NAME, "Latitude");
    latitudeRange.setBinningStrategy(new NumericRangeFieldValueBinningStrategy(45, "Latitude"));
    TimeRangeStatistic timeRangeHourBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
    timeRangeHourBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.HOUR, "TimeStamp"));
    timeRangeHourBin.setTag("hour");
    TimeRangeStatistic timeRangeDayBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
    timeRangeDayBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.DAY, "TimeStamp"));
    timeRangeDayBin.setTag("day");
    TimeRangeStatistic timeRangeWeekBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
    timeRangeWeekBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.WEEK, "TimeStamp"));
    timeRangeWeekBin.setTag("week");
    TimeRangeStatistic timeRangeMonthBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
    timeRangeMonthBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.MONTH, "TimeStamp"));
    timeRangeMonthBin.setTag("month");
    TimeRangeStatistic timeRangeYearBin = new TimeRangeStatistic(SimpleIngest.FEATURE_NAME, "TimeStamp");
    timeRangeYearBin.setBinningStrategy(new TimeRangeFieldValueBinningStrategy(Unit.YEAR, "TimeStamp"));
    timeRangeYearBin.setTag("year");
    CountStatistic countByGridUsingMultifield = new CountStatistic(SimpleIngest.FEATURE_NAME);
    countByGridUsingMultifield.setTag("multifield-latlon");
    countByGridUsingMultifield.setBinningStrategy(new NumericRangeFieldValueBinningStrategy(45, "Latitude", "Longitude"));
    CountStatistic countByGridUsingComposite = new CountStatistic(SimpleIngest.FEATURE_NAME);
    countByGridUsingComposite.setTag("composite-latlon");
    countByGridUsingComposite.setBinningStrategy(new CompositeBinningStrategy(new NumericRangeFieldValueBinningStrategy(45, 22.5, "Latitude"), new NumericRangeFieldValueBinningStrategy(90, 45, "Longitude")));
    long min = Long.MAX_VALUE, max = Long.MIN_VALUE;
    try (CloseableIterator<SimpleFeature> it = ds.query(VectorQueryBuilder.newBuilder().build())) {
        while (it.hasNext()) {
            final long time = ((Date) it.next().getAttribute("TimeStamp")).getTime();
            min = Math.min(min, time);
            max = Math.max(max, time);
        }
    }
    final Interval overallInterval = Interval.of(Instant.ofEpochMilli(min), Instant.ofEpochMilli(max));
    ds.addStatistic(longitudeRange, latitudeRange, timeRangeHourBin, timeRangeDayBin, timeRangeWeekBin, timeRangeMonthBin, timeRangeYearBin, countByGridUsingMultifield, countByGridUsingComposite);
    // let's make sure seralization/deserialization works for stats
    ds = dataStore.createDataStore();
    longitudeRange = (NumericRangeStatistic) ds.getFieldStatistic(longitudeRange.getStatisticType(), longitudeRange.getTypeName(), longitudeRange.getFieldName(), longitudeRange.getTag());
    latitudeRange = (NumericRangeStatistic) ds.getFieldStatistic(latitudeRange.getStatisticType(), latitudeRange.getTypeName(), latitudeRange.getFieldName(), latitudeRange.getTag());
    timeRangeHourBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeHourBin.getStatisticType(), timeRangeHourBin.getTypeName(), timeRangeHourBin.getFieldName(), timeRangeHourBin.getTag());
    timeRangeDayBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeDayBin.getStatisticType(), timeRangeDayBin.getTypeName(), timeRangeDayBin.getFieldName(), timeRangeDayBin.getTag());
    timeRangeWeekBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeWeekBin.getStatisticType(), timeRangeWeekBin.getTypeName(), timeRangeWeekBin.getFieldName(), timeRangeWeekBin.getTag());
    timeRangeMonthBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeMonthBin.getStatisticType(), timeRangeMonthBin.getTypeName(), timeRangeMonthBin.getFieldName(), timeRangeMonthBin.getTag());
    timeRangeYearBin = (TimeRangeStatistic) ds.getFieldStatistic(timeRangeYearBin.getStatisticType(), timeRangeYearBin.getTypeName(), timeRangeYearBin.getFieldName(), timeRangeYearBin.getTag());
    countByGridUsingMultifield = (CountStatistic) ds.getDataTypeStatistic(countByGridUsingMultifield.getStatisticType(), countByGridUsingMultifield.getTypeName(), countByGridUsingMultifield.getTag());
    countByGridUsingComposite = (CountStatistic) ds.getDataTypeStatistic(countByGridUsingComposite.getStatisticType(), countByGridUsingComposite.getTypeName(), countByGridUsingComposite.getTag());
    Range<Double> rangeValue = ds.getStatisticValue(longitudeRange);
    assertEquals(-165.0, rangeValue.getMinimum(), 0.1);
    assertEquals(180.0, rangeValue.getMaximum(), 0.1);
    rangeValue = ds.getStatisticValue(latitudeRange);
    assertEquals(-90.0, rangeValue.getMinimum(), 0.1);
    assertEquals(85.0, rangeValue.getMaximum(), 0.1);
    // Verify count statistic exists
    final Statistic<CountValue> countStat = ds.getDataTypeStatistic(CountStatistic.STATS_TYPE, SimpleIngest.FEATURE_NAME, Statistic.INTERNAL_TAG);
    assertNotNull(countStat);
    // Verify value exists
    Long countValue = ds.getStatisticValue(countStat);
    assertEquals(new Long(20), countValue);
    countValue = ds.getStatisticValue(countByGridUsingMultifield);
    assertEquals(new Long(20), countValue);
    countValue = ds.getStatisticValue(countByGridUsingComposite);
    assertEquals(new Long(20), countValue);
    try (CloseableIterator<Pair<ByteArray, Range<Double>>> iterator = ds.getBinnedStatisticValues(longitudeRange)) {
        int count = 0;
        while (iterator.hasNext()) {
            final Pair<ByteArray, Range<Double>> binValue = iterator.next();
            final Range<Double> binRange = ((NumericRangeFieldValueBinningStrategy) longitudeRange.getBinningStrategy()).getRange(binValue.getKey());
            assertEquals(1, binRange.getMaximum() - binRange.getMinimum(), 0.1);
            assertTrue(binRange.containsRange(binValue.getValue()));
            count++;
        }
        assertEquals(20, count);
    }
    try (CloseableIterator<Pair<ByteArray, Range<Double>>> iterator = ds.getBinnedStatisticValues(latitudeRange)) {
        int count = 0;
        while (iterator.hasNext()) {
            final Pair<ByteArray, Range<Double>> binValue = iterator.next();
            final Range<Double> binRange = ((NumericRangeFieldValueBinningStrategy) latitudeRange.getBinningStrategy()).getRange(binValue.getKey());
            assertEquals(45, binRange.getMaximum() - binRange.getMinimum(), 0.1);
            assertTrue(binRange.containsRange(binValue.getValue()));
            count++;
        }
        assertEquals(4, count);
    }
    try (CloseableIterator<Pair<ByteArray, Range<Double>>> iterator = ds.getBinnedStatisticValues(latitudeRange)) {
        int count = 0;
        while (iterator.hasNext()) {
            final Pair<ByteArray, Range<Double>> binValue = iterator.next();
            final Range<Double> binRange = ((NumericRangeFieldValueBinningStrategy) latitudeRange.getBinningStrategy()).getRange(binValue.getKey());
            assertEquals(45, binRange.getMaximum() - binRange.getMinimum(), 0.1);
            assertTrue(binRange.containsRange(binValue.getValue()));
            count++;
        }
        assertEquals(4, count);
    }
    assertTimeBinning(ds, timeRangeHourBin, 20, (i) -> Duration.ofHours(1L), overallInterval);
    assertTimeBinning(ds, timeRangeDayBin, 20, (i) -> Duration.ofDays(1L), overallInterval);
    assertTimeBinning(ds, timeRangeWeekBin, 20, (i) -> Duration.ofDays(7L), overallInterval);
    assertTimeBinning(ds, timeRangeMonthBin, 12, (i) -> {
        final Calendar cal = Calendar.getInstance();
        cal.setTimeInMillis(i.getStart().toEpochMilli());
        return Duration.ofDays(cal.getActualMaximum(Calendar.DAY_OF_MONTH));
    }, overallInterval);
    assertTimeBinning(ds, timeRangeYearBin, 1, (i) -> {
        final Calendar cal = Calendar.getInstance();
        cal.setTimeInMillis(i.getStart().toEpochMilli());
        return Duration.ofDays(cal.getActualMaximum(Calendar.DAY_OF_YEAR));
    }, overallInterval);
    final Set<ByteArray> multiFieldFilteredExpectedResults = new HashSet<>();
    int multiFieldFilteredExpectedCount = 0;
    try (CloseableIterator<Pair<ByteArray, Long>> iterator = ds.getBinnedStatisticValues(countByGridUsingMultifield)) {
        int count = 0;
        while (iterator.hasNext()) {
            final Pair<ByteArray, Long> binValue = iterator.next();
            final Map<String, Range<Double>> rangePerField = ((NumericRangeFieldValueBinningStrategy) countByGridUsingMultifield.getBinningStrategy()).getRanges(binValue.getKey());
            assertEquals(1L, binValue.getValue().longValue());
            assertEquals(2, rangePerField.size());
            final Range<Double> latRange = rangePerField.get("Latitude");
            final Range<Double> lonRange = rangePerField.get("Longitude");
            // this ensures the interval is 45
            assertEquals(45, latRange.getMaximum() - latRange.getMinimum(), 0.1);
            assertEquals(45, lonRange.getMaximum() - lonRange.getMinimum(), 0.1);
            // this ensures the offset is 0
            assertEquals(0.0, latRange.getMinimum() % 45.0, 0.1);
            assertEquals(0.0, lonRange.getMinimum() % 45.0, 0.1);
            if (latRange.isOverlappedBy(Range.is(12.0)) && lonRange.isOverlappedBy(Range.between(-89.0, 89.0))) {
                multiFieldFilteredExpectedResults.add(binValue.getKey());
                multiFieldFilteredExpectedCount += binValue.getValue();
            }
            count++;
        }
        assertEquals(20, count);
    }
    // now query by object constraints on the gridded bins
    try (CloseableIterator<Pair<ByteArray, Long>> iterator = ds.getBinnedStatisticValues(countByGridUsingMultifield, BinConstraints.ofObject(new Pair[] { Pair.of("Latitude", Double.valueOf(12.0)), Pair.of("Longitude", Range.between(-89.0, 89.0)) }))) {
        final Set<ByteArray> multiFieldFilteredActualResults = new HashSet<>();
        int count = 0;
        while (iterator.hasNext()) {
            final Pair<ByteArray, Long> binValue = iterator.next();
            final Map<String, Range<Double>> rangePerField = ((NumericRangeFieldValueBinningStrategy) countByGridUsingMultifield.getBinningStrategy()).getRanges(binValue.getKey());
            assertEquals(1L, binValue.getValue().longValue());
            assertEquals(2, rangePerField.size());
            final Range<Double> latRange = rangePerField.get("Latitude");
            final Range<Double> lonRange = rangePerField.get("Longitude");
            // this ensures the interval is 45
            assertEquals(0.0, latRange.getMinimum(), 0.1);
            assertEquals(45.0, latRange.getMaximum(), 0.1);
            assertEquals(45, lonRange.getMaximum() - lonRange.getMinimum(), 0.1);
            assertTrue(lonRange.getMaximum() < 90.1);
            assertTrue(lonRange.getMinimum() > -90.1);
            // this ensures the offset is 0
            assertEquals(0.0, latRange.getMinimum() % 45.0, 0.1);
            assertEquals(0.0, lonRange.getMinimum() % 45.0, 0.1);
            count += binValue.getValue();
            multiFieldFilteredActualResults.add(binValue.getKey());
        }
        assertEquals(multiFieldFilteredExpectedCount, count);
        assertTrue(multiFieldFilteredExpectedResults.containsAll(multiFieldFilteredActualResults));
        assertTrue(multiFieldFilteredActualResults.containsAll(multiFieldFilteredExpectedResults));
    }
    final Set<ByteArray> compositeFilteredExpectedResults = new HashSet<>();
    int compositeFilteredExpectedCount = 0;
    try (CloseableIterator<Pair<ByteArray, Long>> iterator = ds.getBinnedStatisticValues(countByGridUsingComposite)) {
        int count = 0;
        int totalCount = 0;
        while (iterator.hasNext()) {
            final Pair<ByteArray, Long> binValue = iterator.next();
            totalCount += binValue.getValue();
            final Pair<StatisticBinningStrategy, ByteArray>[] bins = ((CompositeBinningStrategy) countByGridUsingComposite.getBinningStrategy()).getSubBins(binValue.getKey());
            assertEquals(2, bins.length);
            final Range<Double> latRange = ((NumericRangeFieldValueBinningStrategy) bins[0].getLeft()).getRange(bins[0].getRight());
            final Range<Double> lonRange = ((NumericRangeFieldValueBinningStrategy) bins[1].getLeft()).getRange(bins[1].getRight());
            // this ensures the interval is 45 and 90 respectively
            assertEquals(45, latRange.getMaximum() - latRange.getMinimum(), 0.1);
            assertEquals(90, lonRange.getMaximum() - lonRange.getMinimum(), 0.1);
            // this ensures the offset is 22.5 and 45 respectively
            assertEquals(22.5, Math.abs(latRange.getMinimum() % 45.0), 0.1);
            assertEquals(45.0, Math.abs(lonRange.getMinimum() % 90.0), 0.1);
            count++;
            if (latRange.isOverlappedBy(Range.between(-44.0, 44.0)) && lonRange.isOverlappedBy(Range.between(-179.0, 89.0))) {
                compositeFilteredExpectedResults.add(binValue.getKey());
                compositeFilteredExpectedCount += binValue.getValue();
            }
        }
        assertEquals(16, count);
        assertEquals(20, totalCount);
    }
    try (CloseableIterator<Pair<ByteArray, Long>> iterator = ds.getBinnedStatisticValues(countByGridUsingComposite, BinConstraints.ofObject(new Range[] { Range.between(-44.0, 44.0), Range.between(-179.0, 89.0) }))) {
        final Set<ByteArray> compositeFilteredActualResults = new HashSet<>();
        int totalCount = 0;
        while (iterator.hasNext()) {
            final Pair<ByteArray, Long> binValue = iterator.next();
            totalCount += binValue.getValue();
            final Pair<StatisticBinningStrategy, ByteArray>[] bins = ((CompositeBinningStrategy) countByGridUsingComposite.getBinningStrategy()).getSubBins(binValue.getKey());
            assertEquals(2, bins.length);
            final Range<Double> latRange = ((NumericRangeFieldValueBinningStrategy) bins[0].getLeft()).getRange(bins[0].getRight());
            final Range<Double> lonRange = ((NumericRangeFieldValueBinningStrategy) bins[1].getLeft()).getRange(bins[1].getRight());
            // this ensures the interval is 45 and 90 respectively
            assertEquals(45, latRange.getMaximum() - latRange.getMinimum(), 0.1);
            assertEquals(90, lonRange.getMaximum() - lonRange.getMinimum(), 0.1);
            // this ensures the offset is 22.5 and 45 respectively
            assertEquals(22.5, Math.abs(latRange.getMinimum() % 45.0), 0.1);
            assertEquals(45.0, Math.abs(lonRange.getMinimum() % 90.0), 0.1);
            assertTrue(latRange.getMaximum() < 67.6);
            assertTrue(latRange.getMinimum() > -67.6);
            assertTrue(lonRange.getMaximum() < 135.1);
            assertTrue(lonRange.getMinimum() > -225.1);
            compositeFilteredActualResults.add(binValue.getKey());
        }
        assertTrue(compositeFilteredExpectedResults.containsAll(compositeFilteredActualResults));
        assertTrue(compositeFilteredActualResults.containsAll(compositeFilteredExpectedResults));
        assertEquals(compositeFilteredExpectedCount, totalCount);
    }
}
Also used : NumericRangeStatistic(org.locationtech.geowave.core.store.statistics.field.NumericRangeStatistic) TimeRangeFieldValueBinningStrategy(org.locationtech.geowave.core.geotime.store.statistics.binning.TimeRangeFieldValueBinningStrategy) CountValue(org.locationtech.geowave.core.store.statistics.adapter.CountStatistic.CountValue) DataStore(org.locationtech.geowave.core.store.api.DataStore) ByteArray(org.locationtech.geowave.core.index.ByteArray) CountStatistic(org.locationtech.geowave.core.store.statistics.adapter.CountStatistic) Pair(org.apache.commons.lang3.tuple.Pair) HashSet(java.util.HashSet) Calendar(java.util.Calendar) Range(org.apache.commons.lang3.Range) SimpleFeature(org.opengis.feature.simple.SimpleFeature) Date(java.util.Date) NumericRangeFieldValueBinningStrategy(org.locationtech.geowave.core.store.statistics.binning.NumericRangeFieldValueBinningStrategy) CompositeBinningStrategy(org.locationtech.geowave.core.store.statistics.binning.CompositeBinningStrategy) TimeRangeStatistic(org.locationtech.geowave.core.geotime.store.statistics.TimeRangeStatistic) Interval(org.threeten.extra.Interval) Test(org.junit.Test)

Aggregations

Calendar (java.util.Calendar)1 Date (java.util.Date)1 HashSet (java.util.HashSet)1 Range (org.apache.commons.lang3.Range)1 Pair (org.apache.commons.lang3.tuple.Pair)1 Test (org.junit.Test)1 TimeRangeStatistic (org.locationtech.geowave.core.geotime.store.statistics.TimeRangeStatistic)1 TimeRangeFieldValueBinningStrategy (org.locationtech.geowave.core.geotime.store.statistics.binning.TimeRangeFieldValueBinningStrategy)1 ByteArray (org.locationtech.geowave.core.index.ByteArray)1 DataStore (org.locationtech.geowave.core.store.api.DataStore)1 CountStatistic (org.locationtech.geowave.core.store.statistics.adapter.CountStatistic)1 CountValue (org.locationtech.geowave.core.store.statistics.adapter.CountStatistic.CountValue)1 CompositeBinningStrategy (org.locationtech.geowave.core.store.statistics.binning.CompositeBinningStrategy)1 NumericRangeFieldValueBinningStrategy (org.locationtech.geowave.core.store.statistics.binning.NumericRangeFieldValueBinningStrategy)1 NumericRangeStatistic (org.locationtech.geowave.core.store.statistics.field.NumericRangeStatistic)1 SimpleFeature (org.opengis.feature.simple.SimpleFeature)1 Interval (org.threeten.extra.Interval)1