Search in sources :

Example 1 with IndexMetaData

use of org.locationtech.geowave.core.index.IndexMetaData in project geowave by locationtech.

the class SplitsProvider method getSplits.

/**
 * Read the metadata table to get tablets and match up ranges to them.
 */
public List<InputSplit> getSplits(final DataStoreOperations operations, final CommonQueryOptions commonOptions, final DataTypeQueryOptions<?> typeOptions, final IndexQueryOptions indexOptions, final QueryConstraints constraints, final TransientAdapterStore adapterStore, final DataStatisticsStore statsStore, final InternalAdapterStore internalAdapterStore, final IndexStore indexStore, final AdapterIndexMappingStore adapterIndexMappingStore, final JobContext context, final Integer minSplits, final Integer maxSplits) throws IOException, InterruptedException {
    final Map<Pair<Index, ByteArray>, RowRangeHistogramValue> statsCache = new HashMap<>();
    final List<InputSplit> retVal = new ArrayList<>();
    final TreeSet<IntermediateSplitInfo> splits = new TreeSet<>();
    final Map<String, List<Short>> indexIdToAdaptersMap = new HashMap<>();
    for (final Pair<Index, List<Short>> indexAdapterIdPair : BaseDataStoreUtils.getAdaptersWithMinimalSetOfIndices(typeOptions.getTypeNames(), indexOptions.getIndexName(), adapterStore, internalAdapterStore, adapterIndexMappingStore, indexStore, constraints)) {
        QueryConstraints indexAdapterConstraints;
        if (constraints instanceof AdapterAndIndexBasedQueryConstraints) {
            final List<Short> adapters = indexAdapterIdPair.getRight();
            DataTypeAdapter<?> adapter = null;
            // types/adapters
            if (adapters.size() == 1) {
                final String typeName = internalAdapterStore.getTypeName(adapters.get(0));
                if (typeName != null) {
                    adapter = adapterStore.getAdapter(typeName);
                }
            }
            if (adapter == null) {
                indexAdapterConstraints = constraints;
                LOGGER.info("Unable to find type matching an adapter dependent query");
            } else {
                indexAdapterConstraints = ((AdapterAndIndexBasedQueryConstraints) constraints).createQueryConstraints(adapter.asInternalAdapter(adapters.get(0)), indexAdapterIdPair.getLeft(), adapterIndexMappingStore.getMapping(adapters.get(0), indexAdapterIdPair.getLeft().getName()));
                if (indexAdapterConstraints == null) {
                    continue;
                }
                // make sure we pass along the new constraints to the record
                // reader - for spark on YARN (not localy though), job
                // configuration is immutable so while picking up the
                // appropriate constraint from the configuration is more
                // efficient, also do a check for
                // AdapterAndIndexBasedQueryConstraints within the Record Reader
                // itself
                GeoWaveInputFormat.setQueryConstraints(context.getConfiguration(), indexAdapterConstraints);
            }
        } else {
            indexAdapterConstraints = constraints;
        }
        indexIdToAdaptersMap.put(indexAdapterIdPair.getKey().getName(), indexAdapterIdPair.getValue());
        IndexMetaData[] indexMetadata = null;
        if (indexAdapterConstraints != null) {
            final IndexMetaDataSetValue statValue = InternalStatisticsHelper.getIndexMetadata(indexAdapterIdPair.getLeft(), indexAdapterIdPair.getRight(), new AdapterStoreWrapper(adapterStore, internalAdapterStore), statsStore, commonOptions.getAuthorizations());
            if (statValue != null) {
                indexMetadata = statValue.toArray();
            }
        }
        populateIntermediateSplits(splits, operations, indexAdapterIdPair.getLeft(), indexAdapterIdPair.getValue(), statsCache, adapterStore, internalAdapterStore, statsStore, maxSplits, indexAdapterConstraints, (double[]) commonOptions.getHints().get(DataStoreUtils.TARGET_RESOLUTION_PER_DIMENSION_FOR_HIERARCHICAL_INDEX), indexMetadata, commonOptions.getAuthorizations());
    }
    // this is an incremental algorithm, it may be better use the target
    // split count to drive it (ie. to get 3 splits this will split 1
    // large
    // range into two down the middle and then split one of those ranges
    // down the middle to get 3, rather than splitting one range into
    // thirds)
    final List<IntermediateSplitInfo> unsplittable = new ArrayList<>();
    if (!statsCache.isEmpty() && !splits.isEmpty() && (minSplits != null) && (splits.size() < minSplits)) {
        // set the ranges to at least min splits
        do {
            // remove the highest range, split it into 2 and add both
            // back,
            // increasing the size by 1
            final IntermediateSplitInfo highestSplit = splits.pollLast();
            final IntermediateSplitInfo otherSplit = highestSplit.split(statsCache);
            // working our way up the split set.
            if (otherSplit == null) {
                unsplittable.add(highestSplit);
            } else {
                splits.add(highestSplit);
                splits.add(otherSplit);
            }
        } while ((splits.size() != 0) && ((splits.size() + unsplittable.size()) < minSplits));
        // Add all unsplittable splits back to splits array
        splits.addAll(unsplittable);
        if (splits.size() < minSplits) {
            LOGGER.warn("Truly unable to meet split count. Actual Count: " + splits.size());
        }
    } else if (((maxSplits != null) && (maxSplits > 0)) && (splits.size() > maxSplits)) {
        // merge splits to fit within max splits
        do {
            // this is the naive approach, remove the lowest two ranges
            // and merge them, decreasing the size by 1
            // TODO Ideally merge takes into account locations (as well
            // as possibly the index as a secondary criteria) to limit
            // the number of locations/indices
            final IntermediateSplitInfo lowestSplit = splits.pollFirst();
            final IntermediateSplitInfo nextLowestSplit = splits.pollFirst();
            lowestSplit.merge(nextLowestSplit);
            splits.add(lowestSplit);
        } while (splits.size() > maxSplits);
    }
    for (final IntermediateSplitInfo split : splits) {
        retVal.add(split.toFinalSplit(statsStore, adapterStore, internalAdapterStore, indexIdToAdaptersMap, commonOptions.getAuthorizations()));
    }
    return retVal;
}
Also used : HashMap(java.util.HashMap) AdapterAndIndexBasedQueryConstraints(org.locationtech.geowave.core.store.query.constraints.AdapterAndIndexBasedQueryConstraints) ArrayList(java.util.ArrayList) IndexMetaDataSetValue(org.locationtech.geowave.core.store.statistics.index.IndexMetaDataSetStatistic.IndexMetaDataSetValue) Index(org.locationtech.geowave.core.store.api.Index) QueryConstraints(org.locationtech.geowave.core.store.query.constraints.QueryConstraints) AdapterAndIndexBasedQueryConstraints(org.locationtech.geowave.core.store.query.constraints.AdapterAndIndexBasedQueryConstraints) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) List(java.util.List) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Pair(org.apache.commons.lang3.tuple.Pair) RowRangeHistogramValue(org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic.RowRangeHistogramValue) AdapterStoreWrapper(org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper) IndexMetaData(org.locationtech.geowave.core.index.IndexMetaData)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 TreeSet (java.util.TreeSet)1 Pair (org.apache.commons.lang3.tuple.Pair)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 IndexMetaData (org.locationtech.geowave.core.index.IndexMetaData)1 AdapterStoreWrapper (org.locationtech.geowave.core.store.adapter.AdapterStoreWrapper)1 Index (org.locationtech.geowave.core.store.api.Index)1 AdapterAndIndexBasedQueryConstraints (org.locationtech.geowave.core.store.query.constraints.AdapterAndIndexBasedQueryConstraints)1 QueryConstraints (org.locationtech.geowave.core.store.query.constraints.QueryConstraints)1 IndexMetaDataSetValue (org.locationtech.geowave.core.store.statistics.index.IndexMetaDataSetStatistic.IndexMetaDataSetValue)1 RowRangeHistogramValue (org.locationtech.geowave.core.store.statistics.index.RowRangeHistogramStatistic.RowRangeHistogramValue)1