Search in sources :

Example 1 with SplitInfo

use of org.locationtech.geowave.mapreduce.splits.SplitInfo in project geowave by locationtech.

the class HBaseSplitsProvider method populateIntermediateSplits.

@Override
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(final TreeSet<IntermediateSplitInfo> splits, final DataStoreOperations operations, final Index index, final List<Short> adapterIds, final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache, final TransientAdapterStore adapterStore, final InternalAdapterStore internalAdapterStore, final DataStatisticsStore statsStore, final Integer maxSplits, final QueryConstraints query, final double[] targetResolutionPerDimensionForHierarchicalIndex, final IndexMetaData[] indexMetadata, final String[] authorizations) throws IOException {
    HBaseOperations hbaseOperations = null;
    if (operations instanceof HBaseOperations) {
        hbaseOperations = (HBaseOperations) operations;
    } else {
        LOGGER.error("HBaseSplitsProvider requires BasicHBaseOperations object.");
        return splits;
    }
    final String tableName = hbaseOperations.getQualifiedTableName(index.getName());
    final Map<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> binnedRanges = new HashMap<>();
    final RegionLocator regionLocator = hbaseOperations.getRegionLocator(tableName);
    if (regionLocator == null) {
        LOGGER.error("Unable to retrieve RegionLocator for " + tableName);
        return splits;
    }
    // Build list of row ranges from query
    List<ByteArrayRange> ranges = null;
    if (query != null) {
        final List<MultiDimensionalNumericData> indexConstraints = query.getIndexConstraints(index);
        if ((maxSplits != null) && (maxSplits > 0)) {
            ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, maxSplits, indexMetadata).getCompositeQueryRanges();
        } else {
            ranges = DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, -1, indexMetadata).getCompositeQueryRanges();
        }
    }
    PersistentAdapterStore persistentAdapterStore = new AdapterStoreWrapper(adapterStore, internalAdapterStore);
    if (ranges == null) {
        // get partition ranges from stats
        final PartitionsValue statistics = InternalStatisticsHelper.getPartitions(index, adapterIds, persistentAdapterStore, statsStore, authorizations);
        if (statistics != null) {
            ranges = Lists.newArrayList();
            byte[] prevKey = HConstants.EMPTY_BYTE_ARRAY;
            final TreeSet<ByteArray> sortedPartitions = new TreeSet<>(statistics.getValue());
            for (final ByteArray partitionKey : sortedPartitions) {
                final ByteArrayRange range = new ByteArrayRange(prevKey, partitionKey.getBytes());
                ranges.add(range);
                prevKey = partitionKey.getBytes();
            }
            ranges.add(new ByteArrayRange(prevKey, HConstants.EMPTY_BYTE_ARRAY));
            binRanges(ranges, binnedRanges, regionLocator);
        } else {
            binFullRange(binnedRanges, regionLocator);
        }
    } else {
        while (!ranges.isEmpty()) {
            ranges = binRanges(ranges, binnedRanges, regionLocator);
        }
    }
    for (final Entry<HRegionLocation, Map<HRegionInfo, List<ByteArrayRange>>> locationEntry : binnedRanges.entrySet()) {
        final String hostname = locationEntry.getKey().getHostname();
        for (final Entry<HRegionInfo, List<ByteArrayRange>> regionEntry : locationEntry.getValue().entrySet()) {
            final Map<String, SplitInfo> splitInfo = new HashMap<>();
            final List<RangeLocationPair> rangeList = new ArrayList<>();
            for (final ByteArrayRange range : regionEntry.getValue()) {
                final GeoWaveRowRange gwRange = toRowRange(range, index.getIndexStrategy().getPartitionKeyLength());
                final double cardinality = getCardinality(getHistStats(index, adapterIds, persistentAdapterStore, statsStore, statsCache, new ByteArray(gwRange.getPartitionKey()), authorizations), gwRange);
                rangeList.add(new RangeLocationPair(gwRange, hostname, cardinality < 1 ? 1.0 : cardinality));
            }
            if (!rangeList.isEmpty()) {
                splitInfo.put(index.getName(), new SplitInfo(index, rangeList));
                splits.add(new IntermediateSplitInfo(splitInfo, this));
            }
        }
    }
    return splits;
}
Also used : IntermediateSplitInfo(org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo) HashMap(java.util.HashMap) PartitionsValue(org.locationtech.geowave.core.store.statistics.index.PartitionsStatistic.PartitionsValue) ArrayList(java.util.ArrayList) ByteArrayRange(org.locationtech.geowave.core.index.ByteArrayRange) SplitInfo(org.locationtech.geowave.mapreduce.splits.SplitInfo) IntermediateSplitInfo(org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) TreeSet(java.util.TreeSet) ByteArray(org.locationtech.geowave.core.index.ByteArray) ArrayList(java.util.ArrayList) List(java.util.List) RangeLocationPair(org.locationtech.geowave.mapreduce.splits.RangeLocationPair) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) MultiDimensionalNumericData(org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData) HBaseOperations(org.locationtech.geowave.datastore.hbase.operations.HBaseOperations) AdapterStoreWrapper(org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with SplitInfo

use of org.locationtech.geowave.mapreduce.splits.SplitInfo in project geowave by locationtech.

the class AccumuloSplitsProvider method populateIntermediateSplits.

@Override
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(final TreeSet<IntermediateSplitInfo> splits, final DataStoreOperations operations, final Index index, final List<Short> adapterIds, final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache, final TransientAdapterStore adapterStore, final InternalAdapterStore internalAdapterStore, final DataStatisticsStore statsStore, final Integer maxSplits, final QueryConstraints constraints, final double[] targetResolutionPerDimensionForHierarchicalIndex, final IndexMetaData[] indexMetadata, final String[] authorizations) throws IOException {
    AccumuloOperations accumuloOperations = null;
    if (operations instanceof AccumuloOperations) {
        accumuloOperations = (AccumuloOperations) operations;
    } else {
        LOGGER.error("AccumuloSplitsProvider requires AccumuloOperations object.");
        return splits;
    }
    final int partitionKeyLength = index.getIndexStrategy().getPartitionKeyLength();
    Range fullrange;
    try {
        fullrange = toAccumuloRange(new GeoWaveRowRange(null, null, null, true, true), partitionKeyLength);
    } catch (final Exception e) {
        fullrange = new Range();
        LOGGER.warn("Cannot ascertain the full range of the data", e);
    }
    final String tableName = AccumuloUtils.getQualifiedTableName(accumuloOperations.getTableNameSpace(), index.getName());
    final TreeSet<Range> ranges;
    if (constraints != null) {
        final List<MultiDimensionalNumericData> indexConstraints = constraints.getIndexConstraints(index);
        if ((maxSplits != null) && (maxSplits > 0)) {
            ranges = AccumuloUtils.byteArrayRangesToAccumuloRanges(DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, maxSplits, indexMetadata).getCompositeQueryRanges());
        } else {
            ranges = AccumuloUtils.byteArrayRangesToAccumuloRanges(DataStoreUtils.constraintsToQueryRanges(indexConstraints, index, targetResolutionPerDimensionForHierarchicalIndex, -1, indexMetadata).getCompositeQueryRanges());
        }
        if (ranges.size() == 1) {
            final Range range = ranges.first();
            if (range.isInfiniteStartKey() || range.isInfiniteStopKey()) {
                ranges.remove(range);
                ranges.add(fullrange.clip(range));
            }
        }
    } else {
        ranges = new TreeSet<>();
        ranges.add(fullrange);
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("Protected range: " + fullrange);
        }
    }
    // get the metadata information for these ranges
    final HashMap<String, String> hostNameCache = getHostNameCache();
    final Connector conn = accumuloOperations.getConnector();
    Locations locations;
    try {
        locations = conn.tableOperations().locate(tableName, ranges);
    } catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
        throw new IOException("Unable to get Tablet Locations", e);
    }
    for (final Entry<TabletId, List<Range>> tabletIdRanges : locations.groupByTablet().entrySet()) {
        final TabletId tabletId = tabletIdRanges.getKey();
        final String tabletServer = locations.getTabletLocation(tabletId);
        final String ipAddress = tabletServer.split(":", 2)[0];
        String location = hostNameCache.get(ipAddress);
        // authentication
        if (location == null) {
            final InetAddress inetAddress = InetAddress.getByName(ipAddress);
            location = inetAddress.getHostName();
            hostNameCache.put(ipAddress, location);
        }
        final Range tabletRange = tabletId.toRange();
        final Map<String, SplitInfo> splitInfo = new HashMap<>();
        final List<RangeLocationPair> rangeList = new ArrayList<>();
        for (final Range range : tabletIdRanges.getValue()) {
            final Range clippedRange = tabletRange.clip(range);
            if (!(fullrange.beforeStartKey(clippedRange.getEndKey()) || fullrange.afterEndKey(clippedRange.getStartKey()))) {
                final GeoWaveRowRange rowRange = fromAccumuloRange(clippedRange, partitionKeyLength);
                final double cardinality = getCardinality(getHistStats(index, adapterIds, new AdapterStoreWrapper(adapterStore, internalAdapterStore), statsStore, statsCache, new ByteArray(rowRange.getPartitionKey()), authorizations), rowRange);
                rangeList.add(new RangeLocationPair(rowRange, location, cardinality < 1 ? 1.0 : cardinality));
            } else {
                LOGGER.info("Query split outside of range");
            }
            if (LOGGER.isTraceEnabled()) {
                LOGGER.warn("Clipped range: " + rangeList.get(rangeList.size() - 1).getRange());
            }
        }
        if (!rangeList.isEmpty()) {
            splitInfo.put(index.getName(), new SplitInfo(index, rangeList));
            splits.add(new IntermediateSplitInfo(splitInfo, this));
        }
    }
    return splits;
}
Also used : Connector(org.apache.accumulo.core.client.Connector) AccumuloOperations(org.locationtech.geowave.datastore.accumulo.operations.AccumuloOperations) IntermediateSplitInfo(org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SplitInfo(org.locationtech.geowave.mapreduce.splits.SplitInfo) IntermediateSplitInfo(org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ByteArray(org.locationtech.geowave.core.index.ByteArray) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) ArrayList(java.util.ArrayList) List(java.util.List) RangeLocationPair(org.locationtech.geowave.mapreduce.splits.RangeLocationPair) AccumuloException(org.apache.accumulo.core.client.AccumuloException) MultiDimensionalNumericData(org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData) AdapterStoreWrapper(org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper) Locations(org.apache.accumulo.core.client.admin.Locations) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) GeoWaveRowRange(org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TabletId(org.apache.accumulo.core.data.TabletId) InetAddress(java.net.InetAddress)

Aggregations

ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 ByteArray (org.locationtech.geowave.core.index.ByteArray)2 MultiDimensionalNumericData (org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData)2 AdapterStoreWrapper (org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper)2 GeoWaveRowRange (org.locationtech.geowave.mapreduce.splits.GeoWaveRowRange)2 IntermediateSplitInfo (org.locationtech.geowave.mapreduce.splits.IntermediateSplitInfo)2 RangeLocationPair (org.locationtech.geowave.mapreduce.splits.RangeLocationPair)2 SplitInfo (org.locationtech.geowave.mapreduce.splits.SplitInfo)2 IOException (java.io.IOException)1 InetAddress (java.net.InetAddress)1 Map (java.util.Map)1 TreeSet (java.util.TreeSet)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 Connector (org.apache.accumulo.core.client.Connector)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1 Locations (org.apache.accumulo.core.client.admin.Locations)1 Range (org.apache.accumulo.core.data.Range)1