use of org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic in project geowave by locationtech.
the class IndexImpl method getDefaultStatistics.
@Override
public List<Statistic<? extends StatisticValue<?>>> getDefaultStatistics() {
List<Statistic<? extends StatisticValue<?>>> statistics = Lists.newArrayListWithCapacity(6);
IndexMetaDataSetStatistic metadata = new IndexMetaDataSetStatistic(getName(), indexStrategy.createMetaData());
metadata.setBinningStrategy(new DataTypeBinningStrategy());
metadata.setInternal();
statistics.add(metadata);
DuplicateEntryCountStatistic duplicateCounts = new DuplicateEntryCountStatistic(getName());
duplicateCounts.setBinningStrategy(new DataTypeBinningStrategy());
duplicateCounts.setInternal();
statistics.add(duplicateCounts);
PartitionsStatistic partitions = new PartitionsStatistic(getName());
partitions.setBinningStrategy(new DataTypeBinningStrategy());
partitions.setInternal();
statistics.add(partitions);
DifferingVisibilityCountStatistic differingFieldVisibility = new DifferingVisibilityCountStatistic(getName());
differingFieldVisibility.setBinningStrategy(new DataTypeBinningStrategy());
differingFieldVisibility.setInternal();
statistics.add(differingFieldVisibility);
FieldVisibilityCountStatistic fieldVisibilityCount = new FieldVisibilityCountStatistic(getName());
fieldVisibilityCount.setBinningStrategy(new DataTypeBinningStrategy());
fieldVisibilityCount.setInternal();
statistics.add(fieldVisibilityCount);
RowRangeHistogramStatistic rowRangeHistogram = new RowRangeHistogramStatistic(getName());
rowRangeHistogram.setBinningStrategy(new CompositeBinningStrategy(new DataTypeBinningStrategy(), new PartitionBinningStrategy()));
rowRangeHistogram.setInternal();
statistics.add(rowRangeHistogram);
return statistics;
}
use of org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic in project geowave by locationtech.
the class ChooseBestMatchIndexQueryStrategy method getIndices.
@Override
public CloseableIterator<Index> getIndices(final DataStatisticsStore statisticsStore, final AdapterIndexMappingStore mappingStore, final QueryConstraints query, final Index[] indices, final InternalDataAdapter<?> adapter, final Map<QueryHint, Object> hints) {
return new CloseableIterator<Index>() {
Index nextIdx = null;
boolean done = false;
int i = 0;
@Override
public boolean hasNext() {
long min = Long.MAX_VALUE;
Index bestIdx = null;
while (!done && (i < indices.length)) {
nextIdx = indices[i++];
if (nextIdx.getIndexStrategy().getOrderedDimensionDefinitions().length == 0) {
continue;
}
final List<MultiDimensionalNumericData> constraints = query.getIndexConstraints(nextIdx);
RowRangeHistogramStatistic rowRangeHistogramStatistic = null;
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> stats = statisticsStore.getIndexStatistics(nextIdx, RowRangeHistogramStatistic.STATS_TYPE, Statistic.INTERNAL_TAG)) {
if (stats.hasNext()) {
final Statistic<?> statistic = stats.next();
if ((statistic instanceof RowRangeHistogramStatistic) && (statistic.getBinningStrategy() instanceof CompositeBinningStrategy) && ((CompositeBinningStrategy) statistic.getBinningStrategy()).isOfType(DataTypeBinningStrategy.class, PartitionBinningStrategy.class)) {
rowRangeHistogramStatistic = (RowRangeHistogramStatistic) statistic;
}
}
}
if (rowRangeHistogramStatistic == null) {
LOGGER.warn("Best Match Heuristic requires statistic RowRangeHistogramStatistics for each index to properly choose an index.");
}
if (IndexUtils.isFullTableScan(constraints)) {
// result in a full table scan
if (bestIdx == null) {
bestIdx = nextIdx;
}
} else {
final int maxRangeDecomposition;
if (hints.containsKey(QueryHint.MAX_RANGE_DECOMPOSITION)) {
maxRangeDecomposition = (Integer) hints.get(QueryHint.MAX_RANGE_DECOMPOSITION);
} else {
LOGGER.warn("No max range decomposition hint was provided, this should be provided from the data store options");
maxRangeDecomposition = 2000;
}
final QueryRanges ranges = DataStoreUtils.constraintsToQueryRanges(constraints, nextIdx, null, maxRangeDecomposition);
final long temp = DataStoreUtils.cardinality(statisticsStore, rowRangeHistogramStatistic, adapter, nextIdx, ranges);
if (temp < min) {
bestIdx = nextIdx;
min = temp;
}
}
}
nextIdx = bestIdx;
done = true;
return nextIdx != null;
}
@Override
public Index next() throws NoSuchElementException {
if (nextIdx == null) {
throw new NoSuchElementException();
}
final Index returnVal = nextIdx;
nextIdx = null;
return returnVal;
}
@Override
public void remove() {
}
@Override
public void close() {
}
};
}
use of org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic in project geowave by locationtech.
the class OptimalExpressionQuery method determineBestIndices.
@SuppressWarnings({ "rawtypes", "unchecked" })
public List<Pair<Index, List<InternalDataAdapter<?>>>> determineBestIndices(final BaseQueryOptions baseOptions, final InternalDataAdapter<?>[] adapters, final AdapterIndexMappingStore adapterIndexMappingStore, final IndexStore indexStore, final DataStatisticsStore statisticsStore) {
final Map<Index, List<InternalDataAdapter<?>>> bestIndices = Maps.newHashMap();
final Set<String> referencedFields = Sets.newHashSet();
filter.addReferencedFields(referencedFields);
for (final InternalDataAdapter<?> adapter : adapters) {
if (!adapterMatchesFilter(adapter, referencedFields)) {
continue;
}
final AdapterToIndexMapping[] adapterIndices = adapterIndexMappingStore.getIndicesForAdapter(adapter.getAdapterId());
final Map<Index, FilterConstraints<?>> indexConstraints = Maps.newHashMap();
Index bestIndex = null;
for (final AdapterToIndexMapping mapping : adapterIndices) {
if ((baseOptions.getIndexName() != null) && !baseOptions.getIndexName().equals(mapping.getIndexName())) {
continue;
}
final Index index = mapping.getIndex(indexStore);
if (indexFilter != null && !indexFilter.test(index)) {
continue;
}
if ((bestIndex == null) || ((bestIndex instanceof AttributeIndex) && !(index instanceof AttributeIndex))) {
bestIndex = index;
}
final Set<String> indexedFields = Sets.newHashSet();
final Class<? extends Comparable> filterClass;
if ((index instanceof CustomIndex) && (((CustomIndex<?, ?>) index).getCustomIndexStrategy() instanceof TextIndexStrategy)) {
final TextIndexStrategy<?> indexStrategy = (TextIndexStrategy<?>) ((CustomIndex<?, ?>) index).getCustomIndexStrategy();
if (!(indexStrategy.getEntryConverter() instanceof AdapterFieldTextIndexEntryConverter)) {
continue;
}
indexedFields.add(((AdapterFieldTextIndexEntryConverter<?>) indexStrategy.getEntryConverter()).getFieldName());
filterClass = String.class;
} else {
for (final IndexFieldMapper<?, ?> mapper : mapping.getIndexFieldMappers()) {
for (final String adapterField : mapper.getAdapterFields()) {
indexedFields.add(adapterField);
}
}
// Remove any fields that are part of the common index model, but not used in the index
// strategy. They shouldn't be considered when trying to find a best match. In the future
// it may be useful to consider an index that has extra common index dimensions that
// contain filtered fields over one that only matches indexed dimensions. For example, if
// I have a spatial index, and a spatial index that stores time, it should pick the one
// that stores time if I supply a temporal constraint, even though it isn't part of the
// index strategy.
final int modelDimensions = index.getIndexModel().getDimensions().length;
final int strategyDimensions = index.getIndexStrategy().getOrderedDimensionDefinitions().length;
for (int i = modelDimensions - 1; i >= strategyDimensions; i--) {
final IndexFieldMapper<?, ?> mapper = mapping.getMapperForIndexField(index.getIndexModel().getDimensions()[i].getFieldName());
for (final String adapterField : mapper.getAdapterFields()) {
indexedFields.remove(adapterField);
}
}
filterClass = Double.class;
}
if (referencedFields.containsAll(indexedFields)) {
final FilterConstraints<?> constraints = filter.getConstraints(filterClass, statisticsStore, adapter, mapping, index, indexedFields);
if (constraints.constrainsAllFields(indexedFields)) {
indexConstraints.put(index, constraints);
}
}
}
if (indexConstraints.size() == 1) {
final Entry<Index, FilterConstraints<?>> bestEntry = indexConstraints.entrySet().iterator().next();
bestIndex = bestEntry.getKey();
constraintCache.put(adapter.getTypeName(), bestEntry.getValue());
} else if (indexConstraints.size() > 1) {
// determine which constraint is the best
double bestCardinality = Double.MAX_VALUE;
Index bestConstrainedIndex = null;
for (final Entry<Index, FilterConstraints<?>> entry : indexConstraints.entrySet()) {
final QueryRanges ranges = entry.getValue().getQueryRanges(baseOptions, statisticsStore);
if (ranges.isEmpty()) {
continue;
}
// TODO: A future optimization would be to add a default numeric histogram for any numeric
// index dimensions and just use the index data ranges to determine cardinality rather
// than decomposing query ranges.
final StatisticId<RowRangeHistogramValue> statisticId = IndexStatistic.generateStatisticId(entry.getKey().getName(), RowRangeHistogramStatistic.STATS_TYPE, Statistic.INTERNAL_TAG);
final RowRangeHistogramStatistic histogram = (RowRangeHistogramStatistic) statisticsStore.getStatisticById(statisticId);
final double cardinality = DataStoreUtils.cardinality(statisticsStore, histogram, adapter, bestConstrainedIndex, ranges);
if ((bestConstrainedIndex == null) || (cardinality < bestCardinality)) {
bestConstrainedIndex = entry.getKey();
bestCardinality = cardinality;
}
}
if (bestConstrainedIndex != null) {
bestIndex = bestConstrainedIndex;
constraintCache.put(adapter.getTypeName(), indexConstraints.get(bestIndex));
}
}
if (bestIndex == null) {
continue;
}
if (!bestIndices.containsKey(bestIndex)) {
bestIndices.put(bestIndex, Lists.newArrayList());
}
bestIndices.get(bestIndex).add(adapter);
}
return bestIndices.entrySet().stream().map(e -> Pair.of(e.getKey(), e.getValue())).collect(Collectors.toList());
}
use of org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic in project geowave by locationtech.
the class ChooseBestMatchIndexQueryStrategyTest method testChooseSpatialTemporalWithStats.
@Test
public void testChooseSpatialTemporalWithStats() {
final Index temporalindex = new SpatialTemporalIndexBuilder().createIndex();
final Index spatialIndex = new SpatialIndexBuilder().createIndex();
final RowRangeHistogramStatistic rangeTempStats = new RowRangeHistogramStatistic(temporalindex.getName());
rangeTempStats.setBinningStrategy(new CompositeBinningStrategy(new DataTypeBinningStrategy(), new PartitionBinningStrategy()));
rangeTempStats.setInternal();
final RowRangeHistogramStatistic rangeStats = new RowRangeHistogramStatistic(spatialIndex.getName());
rangeStats.setBinningStrategy(new CompositeBinningStrategy(new DataTypeBinningStrategy(), new PartitionBinningStrategy()));
rangeStats.setInternal();
final Map<StatisticId<?>, Map<ByteArray, StatisticValue<?>>> statsMap = new HashMap<>();
final ChooseBestMatchIndexQueryStrategy strategy = new ChooseBestMatchIndexQueryStrategy();
final ConstraintSet cs1 = new ConstraintSet();
cs1.addConstraint(LatitudeDefinition.class, new ConstraintData(new ConstrainedIndexValue(0.3, 0.5), true));
cs1.addConstraint(LongitudeDefinition.class, new ConstraintData(new ConstrainedIndexValue(0.4, 0.7), true));
final ConstraintSet cs2a = new ConstraintSet();
cs2a.addConstraint(TimeDefinition.class, new ConstraintData(new ConstrainedIndexValue(0.1, 0.2), true));
final ConstraintsByClass constraints = new ConstraintsByClass(Arrays.asList(cs2a)).merge(Collections.singletonList(cs1));
final BasicQueryByClass query = new BasicQueryByClass(constraints);
final NumericIndexStrategy temporalIndexStrategy = new SpatialTemporalIndexBuilder().createIndex().getIndexStrategy();
final Random r = new Random(SEED);
for (int i = 0; i < ROWS; i++) {
final double x = r.nextDouble();
final double y = r.nextDouble();
final double t = r.nextDouble();
final InsertionIds id = temporalIndexStrategy.getInsertionIds(new BasicNumericDataset(new NumericData[] { new NumericValue(x), new NumericValue(y), new NumericValue(t) }));
for (final SinglePartitionInsertionIds range : id.getPartitionKeys()) {
Map<ByteArray, StatisticValue<?>> binValues = statsMap.get(rangeTempStats.getId());
if (binValues == null) {
binValues = Maps.newHashMap();
statsMap.put(rangeTempStats.getId(), binValues);
}
final ByteArray bin = CompositeBinningStrategy.getBin(DataTypeBinningStrategy.getBin((String) null), PartitionBinningStrategy.getBin(range.getPartitionKey()));
RowRangeHistogramValue value = (RowRangeHistogramValue) binValues.get(bin);
if (value == null) {
value = rangeTempStats.createEmpty();
value.setBin(bin);
binValues.put(bin, value);
}
((StatisticsIngestCallback) value).entryIngested(null, null, new GeoWaveRowImpl(new GeoWaveKeyImpl(new byte[] { 1 }, (short) 1, range.getPartitionKey(), range.getSortKeys().get(0), 0), new GeoWaveValue[] {}));
}
}
final Index index = new SpatialIndexBuilder().createIndex();
final NumericIndexStrategy indexStrategy = index.getIndexStrategy();
for (int i = 0; i < ROWS; i++) {
final double x = r.nextDouble();
final double y = r.nextDouble();
final double t = r.nextDouble();
final InsertionIds id = indexStrategy.getInsertionIds(new BasicNumericDataset(new NumericData[] { new NumericValue(x), new NumericValue(y), new NumericValue(t) }));
for (final SinglePartitionInsertionIds range : id.getPartitionKeys()) {
Map<ByteArray, StatisticValue<?>> binValues = statsMap.get(rangeStats.getId());
if (binValues == null) {
binValues = Maps.newHashMap();
statsMap.put(rangeStats.getId(), binValues);
}
final ByteArray bin = CompositeBinningStrategy.getBin(DataTypeBinningStrategy.getBin((String) null), PartitionBinningStrategy.getBin(range.getPartitionKey()));
RowRangeHistogramValue value = (RowRangeHistogramValue) binValues.get(bin);
if (value == null) {
value = rangeStats.createEmpty();
value.setBin(bin);
binValues.put(bin, value);
}
((StatisticsIngestCallback) value).entryIngested(null, null, new GeoWaveRowImpl(new GeoWaveKeyImpl(new byte[] { 1 }, (short) 1, range.getPartitionKey(), range.getSortKeys().get(0), 0), new GeoWaveValue[] {}));
}
}
final Iterator<Index> it = getIndices(new TestDataStatisticsStore(Lists.newArrayList(rangeStats, rangeTempStats), statsMap), query, strategy);
assertTrue(it.hasNext());
assertEquals(temporalindex.getName(), it.next().getName());
assertFalse(it.hasNext());
}
Aggregations