Search in sources :

Example 1 with GeoWaveRowRange

use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.

the class AccumuloOperations method getScanner.

protected <T> Scanner getScanner(final RecordReaderParams params) {
    final GeoWaveRowRange range = params.getRowRange();
    final String tableName = params.getIndex().getName();
    Scanner scanner;
    try {
        scanner = createScanner(tableName, params.getAdditionalAuthorizations());
        if (range == null) {
            scanner.setRange(new Range());
        } else {
            scanner.setRange(AccumuloSplitsProvider.toAccumuloRange(range, params.getIndex().getIndexStrategy().getPartitionKeyLength()));
        }
        if ((params.getLimit() != null) && (params.getLimit() > 0) && (params.getLimit() < scanner.getBatchSize())) {
            // do allow the limit to be set to some enormous size.
            scanner.setBatchSize(Math.min(1024, params.getLimit()));
        }
        if (params.getMaxResolutionSubsamplingPerDimension() != null) {
            if (params.getMaxResolutionSubsamplingPerDimension().length != params.getIndex().getIndexStrategy().getOrderedDimensionDefinitions().length) {
                LOGGER.warn("Unable to subsample for table '" + tableName + "'. Subsample dimensions = " + params.getMaxResolutionSubsamplingPerDimension().length + " when indexed dimensions = " + params.getIndex().getIndexStrategy().getOrderedDimensionDefinitions().length);
            } else {
                final int cardinalityToSubsample = (int) Math.round(IndexUtils.getDimensionalBitsUsed(params.getIndex().getIndexStrategy(), params.getMaxResolutionSubsamplingPerDimension()) + (8 * params.getIndex().getIndexStrategy().getPartitionKeyLength()));
                final IteratorSetting iteratorSettings = new IteratorSetting(FixedCardinalitySkippingIterator.CARDINALITY_SKIPPING_ITERATOR_PRIORITY, FixedCardinalitySkippingIterator.CARDINALITY_SKIPPING_ITERATOR_NAME, FixedCardinalitySkippingIterator.class);
                iteratorSettings.addOption(FixedCardinalitySkippingIterator.CARDINALITY_SKIP_INTERVAL, Integer.toString(cardinalityToSubsample));
                scanner.addScanIterator(iteratorSettings);
            }
        }
    } catch (final TableNotFoundException e) {
        LOGGER.warn("Unable to query table '" + tableName + "'.  Table does not exist.", e);
        return null;
    }
    if ((params.getAdapterIds() != null) && (params.getAdapterIds().length > 0)) {
        for (final Short adapterId : params.getAdapterIds()) {
            scanner.fetchColumnFamily(new Text(ByteArrayUtils.shortToString(adapterId)));
        }
    }
    return scanner;
}
Also used : BatchScanner(org.apache.accumulo.core.client.BatchScanner) Scanner(org.apache.accumulo.core.client.Scanner) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) Text(org.apache.hadoop.io.Text) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) Range(org.apache.accumulo.core.data.Range)

Example 2 with GeoWaveRowRange

use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.

the class DynamoDBReader method initRecordScanner.

protected void initRecordScanner() {
    final String tableName = operations.getQualifiedTableName(recordReaderParams.getIndex().getName());
    final ArrayList<Short> adapterIds = Lists.newArrayList();
    if ((recordReaderParams.getAdapterIds() != null) && (recordReaderParams.getAdapterIds().length > 0)) {
        for (final Short adapterId : recordReaderParams.getAdapterIds()) {
            adapterIds.add(adapterId);
        }
    }
    final List<QueryRequest> requests = new ArrayList<>();
    final GeoWaveRowRange range = recordReaderParams.getRowRange();
    for (final Short adapterId : adapterIds) {
        final byte[] startKey = range.isInfiniteStartSortKey() ? null : range.getStartSortKey();
        final byte[] stopKey = range.isInfiniteStopSortKey() ? null : range.getEndSortKey();
        requests.add(getQuery(tableName, range.getPartitionKey(), new ByteArrayRange(startKey, stopKey), adapterId));
    }
    startRead(requests, tableName, recordReaderParams.isClientsideRowMerging(), false);
}
Also used : QueryRequest(com.amazonaws.services.dynamodbv2.model.QueryRequest) ArrayList(java.util.ArrayList) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange)

Example 3 with GeoWaveRowRange

use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.

the class FileSystemReader method createIteratorForRecordReader.

private CloseableIterator<T> createIteratorForRecordReader(final FileSystemClient client, final RecordReaderParams recordReaderParams) {
    final GeoWaveRowRange range = recordReaderParams.getRowRange();
    final byte[] startKey = range.isInfiniteStartSortKey() ? null : range.getStartSortKey();
    final byte[] stopKey = range.isInfiniteStopSortKey() ? null : range.getEndSortKey();
    final SinglePartitionQueryRanges partitionRange = new SinglePartitionQueryRanges(range.getPartitionKey(), Collections.singleton(new ByteArrayRange(startKey, stopKey)));
    final Set<String> authorizations = Sets.newHashSet(recordReaderParams.getAdditionalAuthorizations());
    return createIterator(client, (RangeReaderParams<T>) recordReaderParams, (GeoWaveRowIteratorTransformer<T>) GeoWaveRowIteratorTransformer.NO_OP_TRANSFORMER, Collections.singleton(partitionRange), authorizations, // input splits for record reader use cases
    false);
}
Also used : SinglePartitionQueryRanges(org.locationtech.geowave.core.index.SinglePartitionQueryRanges) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange)

Example 4 with GeoWaveRowRange

use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.

the class HBaseSplitsProvider method populateIntermediateSplits.

@Override
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(final TreeSet<IntermediateSplitInfo> splits, final DataStoreOperations operations, final Index index, final List<Short> adapterIds, final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache, final TransientAdapterStore adapterStore, final InternalAdapterStore internalAdapterStore, final DataStatisticsStore statsStore, final Integer maxSplits, final QueryConstraints query, final double[] targetResolutionPerDimensionForHierarchicalIndex, final IndexMetaData[] indexMetadata, final String[] authorizations) throws IOException {
    HBaseOperations hbaseOperations = null;
    if (operations instanceof HBaseOperations) {
        hbaseOperations = (HBaseOperations) operations;
    } else {
        LOGGER.error("HBaseSplitsProvider requires BasicHBaseOperations object.");
        return splits;
    }
    final String tableName = hbaseOperations.getQualifiedTableName(index.getName());
    final Map<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> binnedRanges = new HashMap<>();
    final RegionLocator regionLocator = hbaseOperations.getRegionLocator(tableName);
    if (regionLocator == null) {
        LOGGER.error("Unable to retrieve RegionLocator for " + tableName);
        return splits;
    }
    // Build list of row ranges from query
    List<ByteArrayRange> ranges = null;
    if (query != null) {
        final List<MultiDimensionalNumericData> indexConstraints = query.getIndexConstraints(index);
        if ((maxSplits != null) && (maxSplits > 0)) {
            ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, maxSplits, indexMetadata).getCompositeQueryRanges();
        } else {
            ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, -1, indexMetadata).getCompositeQueryRanges();
        }
    }
    PersistentAdapterStore persistentAdapterStore = new AdapterStoreWrapper(adapterStore, internalAdapterStore);
    if (ranges == null) {
        // get partition ranges from stats
        final PartitionsValue statistics = InternalStatisticsHelper.getPartitions(index, adapterIds, persistentAdapterStore, statsStore, authorizations);
        if (statistics != null) {
            ranges = Lists.newArrayList();
            byte[] prevKey = HConstants.EMPTY_BYTE_ARRAY;
            final TreeSet<ByteArray> sortedPartitions = new TreeSet<>(statistics.getValue());
            for (final ByteArray partitionKey : sortedPartitions) {
                final ByteArrayRange range = new ByteArrayRange(prevKey, partitionKey.getBytes());
                ranges.add(range);
                prevKey = partitionKey.getBytes();
            }
            ranges.add(new ByteArrayRange(prevKey, HConstants.EMPTY_BYTE_ARRAY));
            binRanges(ranges, binnedRanges, regionLocator);
        } else {
            binFullRange(binnedRanges, regionLocator);
        }
    } else {
        while (!ranges.isEmpty()) {
            ranges = binRanges(ranges, binnedRanges, regionLocator);
        }
    }
    for (final Entry<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> locationEntry : binnedRanges.entrySet()) {
        final String hostname = locationEntry.getKey().getHostname();
        for (final Entry<HRegionInfo, List<ByteArrayRange>> regionEntry : locationEntry.getValue().entrySet()) {
            final Map<String, SplitInfo> splitInfo = new HashMap<>();
            final List<RangeLocationPair> rangeList = new ArrayList<>();
            for (final ByteArrayRange range : regionEntry.getValue()) {
                final GeoWaveRowRange gwRange = toRowRange(range, index.getIndexStrategy().getPartitionKeyLength());
                final double cardinality = getCardinality(getHistStats(index, adapterIds, persistentAdapterStore, statsStore, statsCache, new ByteArray(gwRange.getPartitionKey()), authorizations), gwRange);
                rangeList.add(new RangeLocationPair(gwRange, hostname, cardinality < 1 ? 1.0 : cardinality));
            }
            if (!rangeList.isEmpty()) {
                splitInfo.put(index.getName(), new SplitInfo(index, rangeList));
                splits.add(new IntermediateSplitInfo(splitInfo, this));
            }
        }
    }
    return splits;
}
Also used : IntermediateSplitInfo(org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo) HashMap(java.util.HashMap) PartitionsValue(org.locationtech.geowave.core.store.statistics.index.PartitionsStatistic.PartitionsValue) ArrayList(java.util.ArrayList) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange) SplitInfo(org.locationtech.geowave.mapreduce.splits.SplitInfo) IntermediateSplitInfo(org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) TreeSet(java.util.TreeSet) ByteArray(org.locationtech.geowave.core.index.ByteArray) ArrayList(java.util.ArrayList) List(java.util.List) RangeLocationPair(org.locationtech.geowave.mapreduce.splits.RangeLocationPair) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) MultiDimensionalNumericData(org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData) HBaseOperations(org.locationtech.geowave.datastore.hbase.operations.HBaseOperations) AdapterStoreWrapper(org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with GeoWaveRowRange

use of org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange in project geowave by locationtech.

the class KuduReader method initRecordScanner.

protected void initRecordScanner() {
    final short[] adapterIds = recordReaderParams.getAdapterIds() != null ? recordReaderParams.getAdapterIds() : new short[0];
    final GeoWaveRowRange range = recordReaderParams.getRowRange();
    final byte[] startKey = range.isInfiniteStartSortKey() ? null : range.getStartSortKey();
    final byte[] stopKey = range.isInfiniteStopSortKey() ? null : range.getEndSortKey();
    final SinglePartitionQueryRanges partitionRange = new SinglePartitionQueryRanges(range.getPartitionKey(), Collections.singleton(new ByteArrayRange(startKey, stopKey)));
    try {
        this.iterator = operations.getKuduRangeRead(recordReaderParams.getIndex().getName(), adapterIds, Collections.singleton(partitionRange), DataStoreUtils.isMergingIteratorRequired(recordReaderParams, visibilityEnabled), rowTransformer, new ClientVisibilityFilter(Sets.newHashSet(recordReaderParams.getAdditionalAuthorizations())), visibilityEnabled).results();
    } catch (final KuduException e) {
        LOGGER.error("Error in initializing reader", e);
    }
}
Also used : SinglePartitionQueryRanges(org.locationtech.geowave.core.index.SinglePartitionQueryRanges) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange) ClientVisibilityFilter(org.locationtech.geowave.core.store.query.filter.ClientVisibilityFilter) KuduException(org.apache.kudu.client.KuduException)

Aggregations

GeoWaveRowRange (org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange)10 ByteArrayRange (org.locationtech.geowave.core.index.ByteArrayRange)9 SinglePartitionQueryRanges (org.locationtech.geowave.core.index.SinglePartitionQueryRanges)5 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)2 List (java.util.List)2 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)2 Range (org.apache.accumulo.core.data.Range)2 ByteArray (org.locationtech.geowave.core.index.ByteArray)2 MultiDimensionalNumericData (org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData)2 AdapterStoreWrapper (org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper)2 ClientVisibilityFilter (org.locationtech.geowave.core.store.query.filter.ClientVisibilityFilter)2 IntermediateSplitInfo (org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo)2 RangeLocationPair (org.locationtech.geowave.mapreduce.splits.RangeLocationPair)2 SplitInfo (org.locationtech.geowave.mapreduce.splits.SplitInfo)2 QueryRequest (com.amazonaws.services.dynamodbv2.model.QueryRequest)1 IOException (java.io.IOException)1 InetAddress (java.net.InetAddress)1 Map (java.util.Map)1 TreeSet (java.util.TreeSet)1