use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.
the class AccumuloOperations method getScanner.
protected <T> Scanner getScanner(final RecordReaderParams params) {
final GeoWaveRowRange range = params.getRowRange();
final String tableName = params.getIndex().getName();
Scanner scanner;
try {
scanner = createScanner(tableName, params.getAdditionalAuthorizations());
if (range == null) {
scanner.setRange(new Range());
} else {
scanner.setRange(AccumuloSplitsProvider.toAccumuloRange(range, params.getIndex().getIndexStrategy().getPartitionKeyLength()));
}
if ((params.getLimit() != null) && (params.getLimit() > 0) && (params.getLimit() < scanner.getBatchSize())) {
// do allow the limit to be set to some enormous size.
scanner.setBatchSize(Math.min(1024, params.getLimit()));
}
if (params.getMaxResolutionSubsamplingPerDimension() != null) {
if (params.getMaxResolutionSubsamplingPerDimension().length != params.getIndex().getIndexStrategy().getOrderedDimensionDefinitions().length) {
LOGGER.warn("Unable to subsample for table '" + tableName + "'. Subsample dimensions = " + params.getMaxResolutionSubsamplingPerDimension().length + " when indexed dimensions = " + params.getIndex().getIndexStrategy().getOrderedDimensionDefinitions().length);
} else {
final int cardinalityToSubsample = (int) Math.round(IndexUtils.getDimensionalBitsUsed(params.getIndex().getIndexStrategy(), params.getMaxResolutionSubsamplingPerDimension()) + (8 * params.getIndex().getIndexStrategy().getPartitionKeyLength()));
final IteratorSetting iteratorSettings = new IteratorSetting(FixedCardinalitySkippingIterator.CARDINALITY_SKIPPING_ITERATOR_PRIORITY, FixedCardinalitySkippingIterator.CARDINALITY_SKIPPING_ITERATOR_NAME, FixedCardinalitySkippingIterator.class);
iteratorSettings.addOption(FixedCardinalitySkippingIterator.CARDINALITY_SKIP_INTERVAL, Integer.toString(cardinalityToSubsample));
scanner.addScanIterator(iteratorSettings);
}
}
} catch (final TableNotFoundException e) {
LOGGER.warn("Unable to query table '" + tableName + "'. Table does not exist.", e);
return null;
}
if ((params.getAdapterIds() != null) && (params.getAdapterIds().length > 0)) {
for (final Short adapterId : params.getAdapterIds()) {
scanner.fetchColumnFamily(new Text(ByteArrayUtils.shortToString(adapterId)));
}
}
return scanner;
}
use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.
the class DynamoDBReader method initRecordScanner.
protected void initRecordScanner() {
final String tableName = operations.getQualifiedTableName(recordReaderParams.getIndex().getName());
final ArrayList<Short> adapterIds = Lists.newArrayList();
if ((recordReaderParams.getAdapterIds() != null) && (recordReaderParams.getAdapterIds().length > 0)) {
for (final Short adapterId : recordReaderParams.getAdapterIds()) {
adapterIds.add(adapterId);
}
}
final List<QueryRequest> requests = new ArrayList<>();
final GeoWaveRowRange range = recordReaderParams.getRowRange();
for (final Short adapterId : adapterIds) {
final byte[] startKey = range.isInfiniteStartSortKey() ? null : range.getStartSortKey();
final byte[] stopKey = range.isInfiniteStopSortKey() ? null : range.getEndSortKey();
requests.add(getQuery(tableName, range.getPartitionKey(), new ByteArrayRange(startKey, stopKey), adapterId));
}
startRead(requests, tableName, recordReaderParams.isClientsideRowMerging(), false);
}
use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.
the class FileSystemReader method createIteratorForRecordReader.
private CloseableIterator<T> createIteratorForRecordReader(final FileSystemClient client, final RecordReaderParams recordReaderParams) {
final GeoWaveRowRange range = recordReaderParams.getRowRange();
final byte[] startKey = range.isInfiniteStartSortKey() ? null : range.getStartSortKey();
final byte[] stopKey = range.isInfiniteStopSortKey() ? null : range.getEndSortKey();
final SinglePartitionQueryRanges partitionRange = new SinglePartitionQueryRanges(range.getPartitionKey(), Collections.singleton(new ByteArrayRange(startKey, stopKey)));
final Set<String> authorizations = Sets.newHashSet(recordReaderParams.getAdditionalAuthorizations());
return createIterator(client, (RangeReaderParams<T>) recordReaderParams, (GeoWaveRowIteratorTransformer<T>) GeoWaveRowIteratorTransformer.NO_OP_TRANSFORMER, Collections.singleton(partitionRange), authorizations, // input splits for record reader use cases
false);
}
use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.
the class HBaseSplitsProvider method populateIntermediateSplits.
@Override
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(final TreeSet<IntermediateSplitInfo> splits, final DataStoreOperations operations, final Index index, final List<Short> adapterIds, final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache, final TransientAdapterStore adapterStore, final InternalAdapterStore internalAdapterStore, final DataStatisticsStore statsStore, final Integer maxSplits, final QueryConstraints query, final double[] targetResolutionPerDimensionForHierarchicalIndex, final IndexMetaData[] indexMetadata, final String[] authorizations) throws IOException {
HBaseOperations hbaseOperations = null;
if (operations instanceof HBaseOperations) {
hbaseOperations = (HBaseOperations) operations;
} else {
LOGGER.error("HBaseSplitsProvider requires BasicHBaseOperations object.");
return splits;
}
final String tableName = hbaseOperations.getQualifiedTableName(index.getName());
final Map<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> binnedRanges = new HashMap<>();
final RegionLocator regionLocator = hbaseOperations.getRegionLocator(tableName);
if (regionLocator == null) {
LOGGER.error("Unable to retrieve RegionLocator for " + tableName);
return splits;
}
// Build list of row ranges from query
List<ByteArrayRange> ranges = null;
if (query != null) {
final List<MultiDimensionalNumericData> indexConstraints = query.getIndexConstraints(index);
if ((maxSplits != null) && (maxSplits > 0)) {
ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, maxSplits, indexMetadata).getCompositeQueryRanges();
} else {
ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, -1, indexMetadata).getCompositeQueryRanges();
}
}
PersistentAdapterStore persistentAdapterStore = new AdapterStoreWrapper(adapterStore, internalAdapterStore);
if (ranges == null) {
// get partition ranges from stats
final PartitionsValue statistics = InternalStatisticsHelper.getPartitions(index, adapterIds, persistentAdapterStore, statsStore, authorizations);
if (statistics != null) {
ranges = Lists.newArrayList();
byte[] prevKey = HConstants.EMPTY_BYTE_ARRAY;
final TreeSet<ByteArray> sortedPartitions = new TreeSet<>(statistics.getValue());
for (final ByteArray partitionKey : sortedPartitions) {
final ByteArrayRange range = new ByteArrayRange(prevKey, partitionKey.getBytes());
ranges.add(range);
prevKey = partitionKey.getBytes();
}
ranges.add(new ByteArrayRange(prevKey, HConstants.EMPTY_BYTE_ARRAY));
binRanges(ranges, binnedRanges, regionLocator);
} else {
binFullRange(binnedRanges, regionLocator);
}
} else {
while (!ranges.isEmpty()) {
ranges = binRanges(ranges, binnedRanges, regionLocator);
}
}
for (final Entry<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> locationEntry : binnedRanges.entrySet()) {
final String hostname = locationEntry.getKey().getHostname();
for (final Entry<HRegionInfo, List<ByteArrayRange>> regionEntry : locationEntry.getValue().entrySet()) {
final Map<String, SplitInfo> splitInfo = new HashMap<>();
final List<RangeLocationPair> rangeList = new ArrayList<>();
for (final ByteArrayRange range : regionEntry.getValue()) {
final GeoWaveRowRange gwRange = toRowRange(range, index.getIndexStrategy().getPartitionKeyLength());
final double cardinality = getCardinality(getHistStats(index, adapterIds, persistentAdapterStore, statsStore, statsCache, new ByteArray(gwRange.getPartitionKey()), authorizations), gwRange);
rangeList.add(new RangeLocationPair(gwRange, hostname, cardinality < 1 ? 1.0 : cardinality));
}
if (!rangeList.isEmpty()) {
splitInfo.put(index.getName(), new SplitInfo(index, rangeList));
splits.add(new IntermediateSplitInfo(splitInfo, this));
}
}
}
return splits;
}
use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.
the class KuduReader method initRecordScanner.
protected void initRecordScanner() {
final short[] adapterIds = recordReaderParams.getAdapterIds() != null ? recordReaderParams.getAdapterIds() : new short[0];
final GeoWaveRowRange range = recordReaderParams.getRowRange();
final byte[] startKey = range.isInfiniteStartSortKey() ? null : range.getStartSortKey();
final byte[] stopKey = range.isInfiniteStopSortKey() ? null : range.getEndSortKey();
final SinglePartitionQueryRanges partitionRange = new SinglePartitionQueryRanges(range.getPartitionKey(), Collections.singleton(new ByteArrayRange(startKey, stopKey)));
try {
this.iterator = operations.getKuduRangeRead(recordReaderParams.getIndex().getName(), adapterIds, Collections.singleton(partitionRange), DataStoreUtils.isMergingIteratorRequired(recordReaderParams, visibilityEnabled), rowTransformer, new ClientVisibilityFilter(Sets.newHashSet(recordReaderParams.getAdditionalAuthorizations())), visibilityEnabled).results();
} catch (final KuduException e) {
LOGGER.error("Error in initializing reader", e);
}
}
Aggregations