Search in sources :

Example 1 with HBaseOperations

use of org.locationtech.geowave.datastore.hbase.operations.HBaseOperations in project geowave by locationtech.

the class HBaseSplitsProvider method populateIntermediateSplits.

@Override
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(final TreeSet<IntermediateSplitInfo> splits, final DataStoreOperations operations, final Index index, final List<Short> adapterIds, final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache, final TransientAdapterStore adapterStore, final InternalAdapterStore internalAdapterStore, final DataStatisticsStore statsStore, final Integer maxSplits, final QueryConstraints query, final double[] targetResolutionPerDimensionForHierarchicalIndex, final IndexMetaData[] indexMetadata, final String[] authorizations) throws IOException {
    HBaseOperations hbaseOperations = null;
    if (operations instanceof HBaseOperations) {
        hbaseOperations = (HBaseOperations) operations;
    } else {
        LOGGER.error("HBaseSplitsProvider requires BasicHBaseOperations object.");
        return splits;
    }
    final String tableName = hbaseOperations.getQualifiedTableName(index.getName());
    final Map<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> binnedRanges = new HashMap<>();
    final RegionLocator regionLocator = hbaseOperations.getRegionLocator(tableName);
    if (regionLocator == null) {
        LOGGER.error("Unable to retrieve RegionLocator for " + tableName);
        return splits;
    }
    // Build list of row ranges from query
    List<ByteArrayRange> ranges = null;
    if (query != null) {
        final List<MultiDimensionalNumericData> indexConstraints = query.getIndexConstraints(index);
        if ((maxSplits != null) && (maxSplits > 0)) {
            ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, maxSplits, indexMetadata).getCompositeQueryRanges();
        } else {
            ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, -1, indexMetadata).getCompositeQueryRanges();
        }
    }
    PersistentAdapterStore persistentAdapterStore = new AdapterStoreWrapper(adapterStore, internalAdapterStore);
    if (ranges == null) {
        // get partition ranges from stats
        final PartitionsValue statistics = InternalStatisticsHelper.getPartitions(index, adapterIds, persistentAdapterStore, statsStore, authorizations);
        if (statistics != null) {
            ranges = Lists.newArrayList();
            byte[] prevKey = HConstants.EMPTY_BYTE_ARRAY;
            final TreeSet<ByteArray> sortedPartitions = new TreeSet<>(statistics.getValue());
            for (final ByteArray partitionKey : sortedPartitions) {
                final ByteArrayRange range = new ByteArrayRange(prevKey, partitionKey.getBytes());
                ranges.add(range);
                prevKey = partitionKey.getBytes();
            }
            ranges.add(new ByteArrayRange(prevKey, HConstants.EMPTY_BYTE_ARRAY));
            binRanges(ranges, binnedRanges, regionLocator);
        } else {
            binFullRange(binnedRanges, regionLocator);
        }
    } else {
        while (!ranges.isEmpty()) {
            ranges = binRanges(ranges, binnedRanges, regionLocator);
        }
    }
    for (final Entry<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> locationEntry : binnedRanges.entrySet()) {
        final String hostname = locationEntry.getKey().getHostname();
        for (final Entry<HRegionInfo, List<ByteArrayRange>> regionEntry : locationEntry.getValue().entrySet()) {
            final Map<String, SplitInfo> splitInfo = new HashMap<>();
            final List<RangeLocationPair> rangeList = new ArrayList<>();
            for (final ByteArrayRange range : regionEntry.getValue()) {
                final GeoWaveRowRange gwRange = toRowRange(range, index.getIndexStrategy().getPartitionKeyLength());
                final double cardinality = getCardinality(getHistStats(index, adapterIds, persistentAdapterStore, statsStore, statsCache, new ByteArray(gwRange.getPartitionKey()), authorizations), gwRange);
                rangeList.add(new RangeLocationPair(gwRange, hostname, cardinality < 1 ? 1.0 : cardinality));
            }
            if (!rangeList.isEmpty()) {
                splitInfo.put(index.getName(), new SplitInfo(index, rangeList));
                splits.add(new IntermediateSplitInfo(splitInfo, this));
            }
        }
    }
    return splits;
}
Also used : IntermediateSplitInfo(org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo) HashMap(java.util.HashMap) PartitionsValue(org.locationtech.geowave.core.store.statistics.index.PartitionsStatistic.PartitionsValue) ArrayList(java.util.ArrayList) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange) SplitInfo(org.locationtech.geowave.mapreduce.splits.SplitInfo) IntermediateSplitInfo(org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) TreeSet(java.util.TreeSet) ByteArray(org.locationtech.geowave.core.index.ByteArray) ArrayList(java.util.ArrayList) List(java.util.List) RangeLocationPair(org.locationtech.geowave.mapreduce.splits.RangeLocationPair) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) MultiDimensionalNumericData(org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData) HBaseOperations(org.locationtech.geowave.datastore.hbase.operations.HBaseOperations) AdapterStoreWrapper(org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 TreeSet (java.util.TreeSet)1 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)1 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)1 RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)1 ByteArray (org.locationtech.geowave.core.index.ByteArray)1 ByteArrayRange (org.locationtech.geowave.core.index.ByteArrayRange)1 MultiDimensionalNumericData (org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData)1 AdapterStoreWrapper (org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper)1 PersistentAdapterStore (org.locationtech.geowave.core.store.adapter.PersistentAdapterStore)1 PartitionsValue (org.locationtech.geowave.core.store.statistics.index.PartitionsStatistic.PartitionsValue)1 HBaseOperations (org.locationtech.geowave.datastore.hbase.operations.HBaseOperations)1 GeoWaveRowRange (org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange)1 IntermediateSplitInfo (org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo)1 RangeLocationPair (org.locationtech.geowave.mapreduce.splits.RangeLocationPair)1 SplitInfo (org.locationtech.geowave.mapreduce.splits.SplitInfo)1