use of org.locationtech.geowave.core.store.statistics.StatisticId in project geowave by locationtech.
the class AbstractGeoWaveBasicVectorIT method testStats.
@SuppressWarnings("unchecked")
protected void testStats(final URL[] inputFiles, final boolean multithreaded, final CoordinateReferenceSystem crs, final Index... indices) {
// In the multithreaded case, only test min/max and count. Stats will be
// ingested/ in a different order and will not match.
final LocalFileIngestPlugin<SimpleFeature> localFileIngest = new GeoToolsVectorDataStoreIngestPlugin(Filter.INCLUDE);
final Map<String, StatisticsCache> statsCache = new HashMap<>();
final String[] indexNames = Arrays.stream(indices).map(i -> i.getName()).toArray(i -> new String[i]);
for (final URL inputFile : inputFiles) {
LOGGER.warn("Calculating stats from file '" + inputFile.getPath() + "' - this may take several minutes...");
try (final CloseableIterator<GeoWaveData<SimpleFeature>> dataIterator = localFileIngest.toGeoWaveData(inputFile, indexNames)) {
final TransientAdapterStore adapterCache = new MemoryAdapterStore(localFileIngest.getDataAdapters());
while (dataIterator.hasNext()) {
final GeoWaveData<SimpleFeature> data = dataIterator.next();
final DataTypeAdapter<SimpleFeature> adapter = data.getAdapter(adapterCache);
// it should be a statistical data adapter
if (adapter instanceof DefaultStatisticsProvider) {
StatisticsCache cachedValues = statsCache.get(adapter.getTypeName());
if (cachedValues == null) {
cachedValues = new StatisticsCache(adapter, crs);
statsCache.put(adapter.getTypeName(), cachedValues);
}
cachedValues.entryIngested(data.getValue());
}
}
}
}
final DataStatisticsStore statsStore = getDataStorePluginOptions().createDataStatisticsStore();
final PersistentAdapterStore adapterStore = getDataStorePluginOptions().createAdapterStore();
final InternalDataAdapter<?>[] adapters = adapterStore.getAdapters();
for (final InternalDataAdapter<?> internalDataAdapter : adapters) {
final FeatureDataAdapter adapter = (FeatureDataAdapter) internalDataAdapter.getAdapter();
final StatisticsCache cachedValue = statsCache.get(adapter.getTypeName());
Assert.assertNotNull(cachedValue);
final Set<Entry<Statistic<?>, Map<ByteArray, StatisticValue<?>>>> expectedStats = cachedValue.statsCache.entrySet();
int statsCount = 0;
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> statsIterator = statsStore.getDataTypeStatistics(adapter, null, null)) {
while (statsIterator.hasNext()) {
statsIterator.next();
statsCount++;
}
}
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> statsIterator = statsStore.getFieldStatistics(adapter, null, null, null)) {
while (statsIterator.hasNext()) {
statsIterator.next();
statsCount++;
}
}
Assert.assertEquals("The number of stats for data adapter '" + adapter.getTypeName() + "' do not match count expected", expectedStats.size(), statsCount);
for (final Entry<Statistic<?>, Map<ByteArray, StatisticValue<?>>> expectedStat : expectedStats) {
for (final Entry<ByteArray, StatisticValue<?>> expectedValues : expectedStat.getValue().entrySet()) {
StatisticValue<Object> actual;
if (expectedValues.getKey().equals(StatisticValue.NO_BIN)) {
actual = statsStore.getStatisticValue((Statistic<StatisticValue<Object>>) expectedStat.getKey());
} else {
actual = statsStore.getStatisticValue((Statistic<StatisticValue<Object>>) expectedStat.getKey(), expectedValues.getKey());
}
assertEquals(expectedValues.getValue().getValue(), actual.getValue());
}
}
// finally check the one stat that is more manually calculated -
// the bounding box
StatisticQuery<BoundingBoxValue, Envelope> query = StatisticQueryBuilder.newBuilder(BoundingBoxStatistic.STATS_TYPE).fieldName(adapter.getFeatureType().getGeometryDescriptor().getLocalName()).typeName(adapter.getTypeName()).build();
BoundingBoxValue bboxStat = getDataStorePluginOptions().createDataStore().aggregateStatistics(query);
validateBBox(bboxStat.getValue(), cachedValue);
// now make sure it works without giving field name because there is only one geometry field
// anyways
query = StatisticQueryBuilder.newBuilder(BoundingBoxStatistic.STATS_TYPE).typeName(adapter.getTypeName()).build();
bboxStat = getDataStorePluginOptions().createDataStore().aggregateStatistics(query);
validateBBox(bboxStat.getValue(), cachedValue);
final StatisticId<BoundingBoxValue> bboxStatId = FieldStatistic.generateStatisticId(adapter.getTypeName(), BoundingBoxStatistic.STATS_TYPE, adapter.getFeatureType().getGeometryDescriptor().getLocalName(), Statistic.INTERNAL_TAG);
Assert.assertTrue("Unable to remove individual stat", statsStore.removeStatistic(statsStore.getStatisticById(bboxStatId)));
Assert.assertNull("Individual stat was not successfully removed", statsStore.getStatisticById(bboxStatId));
}
}
use of org.locationtech.geowave.core.store.statistics.StatisticId in project geowave by locationtech.
the class OptimalExpressionQuery method determineBestIndices.
@SuppressWarnings({ "rawtypes", "unchecked" })
public List<Pair<Index, List<InternalDataAdapter<?>>>> determineBestIndices(final BaseQueryOptions baseOptions, final InternalDataAdapter<?>[] adapters, final AdapterIndexMappingStore adapterIndexMappingStore, final IndexStore indexStore, final DataStatisticsStore statisticsStore) {
final Map<Index, List<InternalDataAdapter<?>>> bestIndices = Maps.newHashMap();
final Set<String> referencedFields = Sets.newHashSet();
filter.addReferencedFields(referencedFields);
for (final InternalDataAdapter<?> adapter : adapters) {
if (!adapterMatchesFilter(adapter, referencedFields)) {
continue;
}
final AdapterToIndexMapping[] adapterIndices = adapterIndexMappingStore.getIndicesForAdapter(adapter.getAdapterId());
final Map<Index, FilterConstraints<?>> indexConstraints = Maps.newHashMap();
Index bestIndex = null;
for (final AdapterToIndexMapping mapping : adapterIndices) {
if ((baseOptions.getIndexName() != null) && !baseOptions.getIndexName().equals(mapping.getIndexName())) {
continue;
}
final Index index = mapping.getIndex(indexStore);
if (indexFilter != null && !indexFilter.test(index)) {
continue;
}
if ((bestIndex == null) || ((bestIndex instanceof AttributeIndex) && !(index instanceof AttributeIndex))) {
bestIndex = index;
}
final Set<String> indexedFields = Sets.newHashSet();
final Class<? extends Comparable> filterClass;
if ((index instanceof CustomIndex) && (((CustomIndex<?, ?>) index).getCustomIndexStrategy() instanceof TextIndexStrategy)) {
final TextIndexStrategy<?> indexStrategy = (TextIndexStrategy<?>) ((CustomIndex<?, ?>) index).getCustomIndexStrategy();
if (!(indexStrategy.getEntryConverter() instanceof AdapterFieldTextIndexEntryConverter)) {
continue;
}
indexedFields.add(((AdapterFieldTextIndexEntryConverter<?>) indexStrategy.getEntryConverter()).getFieldName());
filterClass = String.class;
} else {
for (final IndexFieldMapper<?, ?> mapper : mapping.getIndexFieldMappers()) {
for (final String adapterField : mapper.getAdapterFields()) {
indexedFields.add(adapterField);
}
}
// Remove any fields that are part of the common index model, but not used in the index
// strategy. They shouldn't be considered when trying to find a best match. In the future
// it may be useful to consider an index that has extra common index dimensions that
// contain filtered fields over one that only matches indexed dimensions. For example, if
// I have a spatial index, and a spatial index that stores time, it should pick the one
// that stores time if I supply a temporal constraint, even though it isn't part of the
// index strategy.
final int modelDimensions = index.getIndexModel().getDimensions().length;
final int strategyDimensions = index.getIndexStrategy().getOrderedDimensionDefinitions().length;
for (int i = modelDimensions - 1; i >= strategyDimensions; i--) {
final IndexFieldMapper<?, ?> mapper = mapping.getMapperForIndexField(index.getIndexModel().getDimensions()[i].getFieldName());
for (final String adapterField : mapper.getAdapterFields()) {
indexedFields.remove(adapterField);
}
}
filterClass = Double.class;
}
if (referencedFields.containsAll(indexedFields)) {
final FilterConstraints<?> constraints = filter.getConstraints(filterClass, statisticsStore, adapter, mapping, index, indexedFields);
if (constraints.constrainsAllFields(indexedFields)) {
indexConstraints.put(index, constraints);
}
}
}
if (indexConstraints.size() == 1) {
final Entry<Index, FilterConstraints<?>> bestEntry = indexConstraints.entrySet().iterator().next();
bestIndex = bestEntry.getKey();
constraintCache.put(adapter.getTypeName(), bestEntry.getValue());
} else if (indexConstraints.size() > 1) {
// determine which constraint is the best
double bestCardinality = Double.MAX_VALUE;
Index bestConstrainedIndex = null;
for (final Entry<Index, FilterConstraints<?>> entry : indexConstraints.entrySet()) {
final QueryRanges ranges = entry.getValue().getQueryRanges(baseOptions, statisticsStore);
if (ranges.isEmpty()) {
continue;
}
// TODO: A future optimization would be to add a default numeric histogram for any numeric
// index dimensions and just use the index data ranges to determine cardinality rather
// than decomposing query ranges.
final StatisticId<RowRangeHistogramValue> statisticId = IndexStatistic.generateStatisticId(entry.getKey().getName(), RowRangeHistogramStatistic.STATS_TYPE, Statistic.INTERNAL_TAG);
final RowRangeHistogramStatistic histogram = (RowRangeHistogramStatistic) statisticsStore.getStatisticById(statisticId);
final double cardinality = DataStoreUtils.cardinality(statisticsStore, histogram, adapter, bestConstrainedIndex, ranges);
if ((bestConstrainedIndex == null) || (cardinality < bestCardinality)) {
bestConstrainedIndex = entry.getKey();
bestCardinality = cardinality;
}
}
if (bestConstrainedIndex != null) {
bestIndex = bestConstrainedIndex;
constraintCache.put(adapter.getTypeName(), indexConstraints.get(bestIndex));
}
}
if (bestIndex == null) {
continue;
}
if (!bestIndices.containsKey(bestIndex)) {
bestIndices.put(bestIndex, Lists.newArrayList());
}
bestIndices.get(bestIndex).add(adapter);
}
return bestIndices.entrySet().stream().map(e -> Pair.of(e.getKey(), e.getValue())).collect(Collectors.toList());
}
use of org.locationtech.geowave.core.store.statistics.StatisticId in project geowave by locationtech.
the class ChooseBestMatchIndexQueryStrategyTest method testChooseSpatialTemporalWithStats.
@Test
public void testChooseSpatialTemporalWithStats() {
final Index temporalindex = new SpatialTemporalIndexBuilder().createIndex();
final Index spatialIndex = new SpatialIndexBuilder().createIndex();
final RowRangeHistogramStatistic rangeTempStats = new RowRangeHistogramStatistic(temporalindex.getName());
rangeTempStats.setBinningStrategy(new CompositeBinningStrategy(new DataTypeBinningStrategy(), new PartitionBinningStrategy()));
rangeTempStats.setInternal();
final RowRangeHistogramStatistic rangeStats = new RowRangeHistogramStatistic(spatialIndex.getName());
rangeStats.setBinningStrategy(new CompositeBinningStrategy(new DataTypeBinningStrategy(), new PartitionBinningStrategy()));
rangeStats.setInternal();
final Map<StatisticId<?>, Map<ByteArray, StatisticValue<?>>> statsMap = new HashMap<>();
final ChooseBestMatchIndexQueryStrategy strategy = new ChooseBestMatchIndexQueryStrategy();
final ConstraintSet cs1 = new ConstraintSet();
cs1.addConstraint(LatitudeDefinition.class, new ConstraintData(new ConstrainedIndexValue(0.3, 0.5), true));
cs1.addConstraint(LongitudeDefinition.class, new ConstraintData(new ConstrainedIndexValue(0.4, 0.7), true));
final ConstraintSet cs2a = new ConstraintSet();
cs2a.addConstraint(TimeDefinition.class, new ConstraintData(new ConstrainedIndexValue(0.1, 0.2), true));
final ConstraintsByClass constraints = new ConstraintsByClass(Arrays.asList(cs2a)).merge(Collections.singletonList(cs1));
final BasicQueryByClass query = new BasicQueryByClass(constraints);
final NumericIndexStrategy temporalIndexStrategy = new SpatialTemporalIndexBuilder().createIndex().getIndexStrategy();
final Random r = new Random(SEED);
for (int i = 0; i < ROWS; i++) {
final double x = r.nextDouble();
final double y = r.nextDouble();
final double t = r.nextDouble();
final InsertionIds id = temporalIndexStrategy.getInsertionIds(new BasicNumericDataset(new NumericData[] { new NumericValue(x), new NumericValue(y), new NumericValue(t) }));
for (final SinglePartitionInsertionIds range : id.getPartitionKeys()) {
Map<ByteArray, StatisticValue<?>> binValues = statsMap.get(rangeTempStats.getId());
if (binValues == null) {
binValues = Maps.newHashMap();
statsMap.put(rangeTempStats.getId(), binValues);
}
final ByteArray bin = CompositeBinningStrategy.getBin(DataTypeBinningStrategy.getBin((String) null), PartitionBinningStrategy.getBin(range.getPartitionKey()));
RowRangeHistogramValue value = (RowRangeHistogramValue) binValues.get(bin);
if (value == null) {
value = rangeTempStats.createEmpty();
value.setBin(bin);
binValues.put(bin, value);
}
((StatisticsIngestCallback) value).entryIngested(null, null, new GeoWaveRowImpl(new GeoWaveKeyImpl(new byte[] { 1 }, (short) 1, range.getPartitionKey(), range.getSortKeys().get(0), 0), new GeoWaveValue[] {}));
}
}
final Index index = new SpatialIndexBuilder().createIndex();
final NumericIndexStrategy indexStrategy = index.getIndexStrategy();
for (int i = 0; i < ROWS; i++) {
final double x = r.nextDouble();
final double y = r.nextDouble();
final double t = r.nextDouble();
final InsertionIds id = indexStrategy.getInsertionIds(new BasicNumericDataset(new NumericData[] { new NumericValue(x), new NumericValue(y), new NumericValue(t) }));
for (final SinglePartitionInsertionIds range : id.getPartitionKeys()) {
Map<ByteArray, StatisticValue<?>> binValues = statsMap.get(rangeStats.getId());
if (binValues == null) {
binValues = Maps.newHashMap();
statsMap.put(rangeStats.getId(), binValues);
}
final ByteArray bin = CompositeBinningStrategy.getBin(DataTypeBinningStrategy.getBin((String) null), PartitionBinningStrategy.getBin(range.getPartitionKey()));
RowRangeHistogramValue value = (RowRangeHistogramValue) binValues.get(bin);
if (value == null) {
value = rangeStats.createEmpty();
value.setBin(bin);
binValues.put(bin, value);
}
((StatisticsIngestCallback) value).entryIngested(null, null, new GeoWaveRowImpl(new GeoWaveKeyImpl(new byte[] { 1 }, (short) 1, range.getPartitionKey(), range.getSortKeys().get(0), 0), new GeoWaveValue[] {}));
}
}
final Iterator<Index> it = getIndices(new TestDataStatisticsStore(Lists.newArrayList(rangeStats, rangeTempStats), statsMap), query, strategy);
assertTrue(it.hasNext());
assertEquals(temporalindex.getName(), it.next().getName());
assertFalse(it.hasNext());
}
Aggregations