use of org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper in project geowave by locationtech.
the class SplitsProvider method getSplits.
/**
* Read the metadata table to get tablets and match up ranges to them.
*/
public List<InputSplit> getSplits(final DataStoreOperations operations, final CommonQueryOptions commonOptions, final DataTypeQueryOptions<?> typeOptions, final IndexQueryOptions indexOptions, final QueryConstraints constraints, final TransientAdapterStore adapterStore, final DataStatisticsStore statsStore, final InternalAdapterStore internalAdapterStore, final IndexStore indexStore, final AdapterIndexMappingStore adapterIndexMappingStore, final JobContext context, final Integer minSplits, final Integer maxSplits) throws IOException, InterruptedException {
final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache = new HashMap<>();
final List<InputSplit> retVal = new ArrayList<>();
final TreeSet<IntermediateSplitInfo> splits = new TreeSet<>();
final Map<String, List<Short>> indexIdToAdaptersMap = new HashMap<>();
for (final Pair<Index, List<Short>> indexAdapterIdPair : BaseDataStoreUtils.getAdaptersWithMinimalSetOfIndices(typeOptions.getTypeNames(), indexOptions.getIndexName(), adapterStore, internalAdapterStore, adapterIndexMappingStore, indexStore, constraints)) {
QueryConstraints indexAdapterConstraints;
if (constraints instanceof AdapterAndIndexBasedQueryConstraints) {
final List<Short> adapters = indexAdapterIdPair.getRight();
DataTypeAdapter<?> adapter = null;
// types/adapters
if (adapters.size() == 1) {
final String typeName = internalAdapterStore.getTypeName(adapters.get(0));
if (typeName != null) {
adapter = adapterStore.getAdapter(typeName);
}
}
if (adapter == null) {
indexAdapterConstraints = constraints;
LOGGER.info("Unable to find type matching an adapter dependent query");
} else {
indexAdapterConstraints = ((AdapterAndIndexBasedQueryConstraints) constraints).createQueryConstraints(adapter.asInternalAdapter(adapters.get(0)), indexAdapterIdPair.getLeft(), adapterIndexMappingStore.getMapping(adapters.get(0), indexAdapterIdPair.getLeft().getName()));
if (indexAdapterConstraints == null) {
continue;
}
// make sure we pass along the new constraints to the record
// reader - for spark on YARN (not localy though), job
// configuration is immutable so while picking up the
// appropriate constraint from the configuration is more
// efficient, also do a check for
// AdapterAndIndexBasedQueryConstraints within the Record Reader
// itself
GeoWaveInputFormat.setQueryConstraints(context.getConfiguration(), indexAdapterConstraints);
}
} else {
indexAdapterConstraints = constraints;
}
indexIdToAdaptersMap.put(indexAdapterIdPair.getKey().getName(), indexAdapterIdPair.getValue());
IndexMetaData[] indexMetadata = null;
if (indexAdapterConstraints != null) {
final IndexMetaDataSetValue statValue = InternalStatisticsHelper.getIndexMetadata(indexAdapterIdPair.getLeft(), indexAdapterIdPair.getRight(), new AdapterStoreWrapper(adapterStore, internalAdapterStore), statsStore, commonOptions.getAuthorizations());
if (statValue != null) {
indexMetadata = statValue.toArray();
}
}
populateIntermediateSplits(splits, operations, indexAdapterIdPair.getLeft(), indexAdapterIdPair.getValue(), statsCache, adapterStore, internalAdapterStore, statsStore, maxSplits, indexAdapterConstraints, (double[]) commonOptions.getHints().get(DataStoreUtils.TARGET_RESOLUTION_PER_DIMENSION_FOR_HIERARCHICAL_INDEX), indexMetadata, commonOptions.getAuthorizations());
}
// this is an incremental algorithm, it may be better use the target
// split count to drive it (ie. to get 3 splits this will split 1
// large
// range into two down the middle and then split one of those ranges
// down the middle to get 3, rather than splitting one range into
// thirds)
final List<IntermediateSplitInfo> unsplittable = new ArrayList<>();
if (!statsCache.isEmpty() && !splits.isEmpty() && (minSplits != null) && (splits.size() < minSplits)) {
// set the ranges to at least min splits
do {
// remove the highest range, split it into 2 and add both
// back,
// increasing the size by 1
final IntermediateSplitInfo highestSplit = splits.pollLast();
final IntermediateSplitInfo otherSplit = highestSplit.split(statsCache);
// working our way up the split set.
if (otherSplit == null) {
unsplittable.add(highestSplit);
} else {
splits.add(highestSplit);
splits.add(otherSplit);
}
} while ((splits.size() != 0) && ((splits.size() + unsplittable.size()) < minSplits));
// Add all unsplittable splits back to splits array
splits.addAll(unsplittable);
if (splits.size() < minSplits) {
LOGGER.warn("Truly unable to meet split count. Actual Count: " + splits.size());
}
} else if (((maxSplits != null) && (maxSplits > 0)) && (splits.size() > maxSplits)) {
// merge splits to fit within max splits
do {
// this is the naive approach, remove the lowest two ranges
// and merge them, decreasing the size by 1
// TODO Ideally merge takes into account locations (as well
// as possibly the index as a secondary criteria) to limit
// the number of locations/indices
final IntermediateSplitInfo lowestSplit = splits.pollFirst();
final IntermediateSplitInfo nextLowestSplit = splits.pollFirst();
lowestSplit.merge(nextLowestSplit);
splits.add(lowestSplit);
} while (splits.size() > maxSplits);
}
for (final IntermediateSplitInfo split : splits) {
retVal.add(split.toFinalSplit(statsStore, adapterStore, internalAdapterStore, indexIdToAdaptersMap, commonOptions.getAuthorizations()));
}
return retVal;
}
use of org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper in project geowave by locationtech.
the class HBaseSplitsProvider method populateIntermediateSplits.
@Override
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(final TreeSet<IntermediateSplitInfo> splits, final DataStoreOperations operations, final Index index, final List<Short> adapterIds, final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache, final TransientAdapterStore adapterStore, final InternalAdapterStore internalAdapterStore, final DataStatisticsStore statsStore, final Integer maxSplits, final QueryConstraints query, final double[] targetResolutionPerDimensionForHierarchicalIndex, final IndexMetaData[] indexMetadata, final String[] authorizations) throws IOException {
HBaseOperations hbaseOperations = null;
if (operations instanceof HBaseOperations) {
hbaseOperations = (HBaseOperations) operations;
} else {
LOGGER.error("HBaseSplitsProvider requires BasicHBaseOperations object.");
return splits;
}
final String tableName = hbaseOperations.getQualifiedTableName(index.getName());
final Map<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> binnedRanges = new HashMap<>();
final RegionLocator regionLocator = hbaseOperations.getRegionLocator(tableName);
if (regionLocator == null) {
LOGGER.error("Unable to retrieve RegionLocator for " + tableName);
return splits;
}
// Build list of row ranges from query
List<ByteArrayRange> ranges = null;
if (query != null) {
final List<MultiDimensionalNumericData> indexConstraints = query.getIndexConstraints(index);
if ((maxSplits != null) && (maxSplits > 0)) {
ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, maxSplits, indexMetadata).getCompositeQueryRanges();
} else {
ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, -1, indexMetadata).getCompositeQueryRanges();
}
}
PersistentAdapterStore persistentAdapterStore = new AdapterStoreWrapper(adapterStore, internalAdapterStore);
if (ranges == null) {
// get partition ranges from stats
final PartitionsValue statistics = InternalStatisticsHelper.getPartitions(index, adapterIds, persistentAdapterStore, statsStore, authorizations);
if (statistics != null) {
ranges = Lists.newArrayList();
byte[] prevKey = HConstants.EMPTY_BYTE_ARRAY;
final TreeSet<ByteArray> sortedPartitions = new TreeSet<>(statistics.getValue());
for (final ByteArray partitionKey : sortedPartitions) {
final ByteArrayRange range = new ByteArrayRange(prevKey, partitionKey.getBytes());
ranges.add(range);
prevKey = partitionKey.getBytes();
}
ranges.add(new ByteArrayRange(prevKey, HConstants.EMPTY_BYTE_ARRAY));
binRanges(ranges, binnedRanges, regionLocator);
} else {
binFullRange(binnedRanges, regionLocator);
}
} else {
while (!ranges.isEmpty()) {
ranges = binRanges(ranges, binnedRanges, regionLocator);
}
}
for (final Entry<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> locationEntry : binnedRanges.entrySet()) {
final String hostname = locationEntry.getKey().getHostname();
for (final Entry<HRegionInfo, List<ByteArrayRange>> regionEntry : locationEntry.getValue().entrySet()) {
final Map<String, SplitInfo> splitInfo = new HashMap<>();
final List<RangeLocationPair> rangeList = new ArrayList<>();
for (final ByteArrayRange range : regionEntry.getValue()) {
final GeoWaveRowRange gwRange = toRowRange(range, index.getIndexStrategy().getPartitionKeyLength());
final double cardinality = getCardinality(getHistStats(index, adapterIds, persistentAdapterStore, statsStore, statsCache, new ByteArray(gwRange.getPartitionKey()), authorizations), gwRange);
rangeList.add(new RangeLocationPair(gwRange, hostname, cardinality < 1 ? 1.0 : cardinality));
}
if (!rangeList.isEmpty()) {
splitInfo.put(index.getName(), new SplitInfo(index, rangeList));
splits.add(new IntermediateSplitInfo(splitInfo, this));
}
}
}
return splits;
}
use of org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper in project geowave by locationtech.
the class IntermediateSplitInfo method toFinalSplit.
public synchronized GeoWaveInputSplit toFinalSplit(final DataStatisticsStore statisticsStore, final TransientAdapterStore adapterStore, final InternalAdapterStore internalAdapterStore, final Map<String, List<Short>> indexIdToAdaptersMap, final String... authorizations) {
final Set<String> locations = new HashSet<>();
for (final Entry<String, SplitInfo> entry : splitInfo.entrySet()) {
for (final RangeLocationPair pair : entry.getValue().getRangeLocationPairs()) {
if ((pair.getLocation() != null) && !pair.getLocation().isEmpty()) {
locations.add(pair.getLocation());
}
}
}
for (final SplitInfo si : splitInfo.values()) {
final List<Short> adapterIds = indexIdToAdaptersMap.get(si.getIndex().getName());
final PersistentAdapterStore persistentAdapterStore = new AdapterStoreWrapper(adapterStore, internalAdapterStore);
final DifferingVisibilityCountValue differingVisibilityCounts = InternalStatisticsHelper.getDifferingVisibilityCounts(si.getIndex(), adapterIds, persistentAdapterStore, statisticsStore, authorizations);
final FieldVisibilityCountValue visibilityCounts = InternalStatisticsHelper.getVisibilityCounts(si.getIndex(), adapterIds, persistentAdapterStore, statisticsStore, authorizations);
si.setClientsideRowMerging(BaseDataStoreUtils.isRowMerging(persistentAdapterStore, ArrayUtils.toPrimitive(adapterIds.toArray(new Short[0]))));
si.setMixedVisibility((differingVisibilityCounts == null) || differingVisibilityCounts.isAnyEntryDifferingFieldVisiblity());
si.setAuthorizationsLimiting((visibilityCounts == null) || visibilityCounts.isAuthorizationsLimiting(authorizations));
}
return new GeoWaveInputSplit(splitInfo, locations.toArray(new String[locations.size()]));
}
use of org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper in project geowave by locationtech.
the class SplitsProvider method populateIntermediateSplits.
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(final TreeSet<IntermediateSplitInfo> splits, final DataStoreOperations operations, final Index index, final List<Short> adapterIds, final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache, final TransientAdapterStore adapterStore, final InternalAdapterStore internalAdapterStore, final DataStatisticsStore statsStore, final Integer maxSplits, final QueryConstraints constraints, final double[] targetResolutionPerDimensionForHierarchicalIndex, final IndexMetaData[] indexMetadata, final String[] authorizations) throws IOException {
// Build list of row ranges from query
List<ByteArrayRange> ranges = null;
if (constraints != null) {
final List<MultiDimensionalNumericData> indexConstraints = constraints.getIndexConstraints(index);
if ((maxSplits != null) && (maxSplits > 0)) {
ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, maxSplits, indexMetadata).getCompositeQueryRanges();
} else {
ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, -1, indexMetadata).getCompositeQueryRanges();
}
}
final List<RangeLocationPair> rangeList = new ArrayList<>();
final PersistentAdapterStore persistentAdapterStore = new AdapterStoreWrapper(adapterStore, internalAdapterStore);
if (ranges == null) {
final PartitionsValue statistics = InternalStatisticsHelper.getPartitions(index, adapterIds, persistentAdapterStore, statsStore, authorizations);
// Try to get ranges from histogram statistics
if (statistics != null) {
final Set<ByteArray> partitionKeys = statistics.getValue();
for (final ByteArray partitionKey : partitionKeys) {
final GeoWaveRowRange gwRange = new GeoWaveRowRange(partitionKey.getBytes(), null, null, true, true);
final double cardinality = getCardinality(getHistStats(index, adapterIds, persistentAdapterStore, statsStore, statsCache, partitionKey, authorizations), gwRange);
rangeList.add(new RangeLocationPair(gwRange, cardinality <= 0 ? 0 : cardinality < 1 ? 1.0 : cardinality));
}
} else {
// add one all-inclusive range
rangeList.add(new RangeLocationPair(new GeoWaveRowRange(null, null, null, true, false), 0.0));
}
} else {
for (final ByteArrayRange range : ranges) {
final GeoWaveRowRange gwRange = SplitsProvider.toRowRange(range, index.getIndexStrategy().getPartitionKeyLength());
final double cardinality = getCardinality(getHistStats(index, adapterIds, persistentAdapterStore, statsStore, statsCache, new ByteArray(gwRange.getPartitionKey()), authorizations), gwRange);
rangeList.add(new RangeLocationPair(gwRange, cardinality <= 0 ? 0 : cardinality < 1 ? 1.0 : cardinality));
}
}
final Map<String, SplitInfo> splitInfo = new HashMap<>();
if (!rangeList.isEmpty()) {
splitInfo.put(index.getName(), new SplitInfo(index, rangeList));
splits.add(new IntermediateSplitInfo(splitInfo, this));
}
return splits;
}
use of org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper in project geowave by locationtech.
the class GeoWaveRecordReader method fillIterators.
private void fillIterators(final List<CloseableIterator<Pair<GeoWaveInputKey, T>>> allIterators, final SplitInfo splitInfo, final List<QueryFilter> queryFilters, BigDecimal sum, final Map<RangeLocationPair, BigDecimal> incrementalRangeSums, final NextRangeCallback callback) {
if (!splitInfo.getRangeLocationPairs().isEmpty()) {
final QueryFilter[] filters = ((queryFilters == null) || queryFilters.isEmpty()) ? null : queryFilters.toArray(new QueryFilter[0]);
final PersistentAdapterStore persistentAdapterStore = new AdapterStoreWrapper(adapterStore, internalAdapterStore);
final DataIndexRetrieval dataIndexRetrieval = DataIndexUtils.getDataIndexRetrieval(operations, persistentAdapterStore, aimStore, internalAdapterStore, splitInfo.getIndex(), sanitizedQueryOptions.getFieldIdsAdapterPair(), sanitizedQueryOptions.getAggregation(), sanitizedQueryOptions.getAuthorizations(), dataIndexBatchSize);
final List<Pair<RangeLocationPair, RowReader<GeoWaveRow>>> indexReaders = new ArrayList<>(splitInfo.getRangeLocationPairs().size());
for (final RangeLocationPair r : splitInfo.getRangeLocationPairs()) {
indexReaders.add(Pair.of(r, operations.createReader(new RecordReaderParams(splitInfo.getIndex(), persistentAdapterStore, aimStore, internalAdapterStore, sanitizedQueryOptions.getAdapterIds(internalAdapterStore), sanitizedQueryOptions.getMaxResolutionSubsamplingPerDimension(), sanitizedQueryOptions.getAggregation(), sanitizedQueryOptions.getFieldIdsAdapterPair(), splitInfo.isMixedVisibility(), splitInfo.isAuthorizationsLimiting(), splitInfo.isClientsideRowMerging(), r.getRange(), sanitizedQueryOptions.getLimit(), sanitizedQueryOptions.getMaxRangeDecomposition(), sanitizedQueryOptions.getAuthorizations()))));
incrementalRangeSums.put(r, sum);
sum = sum.add(BigDecimal.valueOf(r.getCardinality()));
}
allIterators.add(concatenateWithCallback(indexReaders, callback, splitInfo.getIndex(), filters, dataIndexRetrieval));
}
}
Aggregations