Search in sources :

Example 1 with SubStrategy

use of org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy in project geowave by locationtech.

the class DataStoreUtils method constraintsToQueryRanges.

public static QueryRanges constraintsToQueryRanges(final List<MultiDimensionalNumericData> constraints, final Index index, final double[] targetResolutionPerDimensionForHierarchicalIndex, final int maxRanges, final IndexMetaData... hints) {
    if ((index instanceof CustomIndex) && (constraints != null) && (constraints.size() == 1) && (constraints.get(0) instanceof InternalCustomConstraints)) {
        return ((CustomIndex) index).getQueryRanges(((InternalCustomConstraints) constraints.get(0)).getCustomConstraints());
    }
    NumericIndexStrategy indexStrategy = index.getIndexStrategy();
    SubStrategy targetIndexStrategy = null;
    if ((targetResolutionPerDimensionForHierarchicalIndex != null) && (targetResolutionPerDimensionForHierarchicalIndex.length == indexStrategy.getOrderedDimensionDefinitions().length)) {
        // determine the correct tier to query for the given resolution
        final HierarchicalNumericIndexStrategy strategy = CompoundHierarchicalIndexStrategyWrapper.findHierarchicalStrategy(indexStrategy);
        if (strategy != null) {
            final TreeMap<Double, SubStrategy> sortedStrategies = new TreeMap<>();
            for (final SubStrategy subStrategy : strategy.getSubStrategies()) {
                final double[] idRangePerDimension = subStrategy.getIndexStrategy().getHighestPrecisionIdRangePerDimension();
                double rangeSum = 0;
                for (final double range : idRangePerDimension) {
                    rangeSum += range;
                }
                // sort by the sum of the range in each dimension
                sortedStrategies.put(rangeSum, subStrategy);
            }
            for (final SubStrategy subStrategy : sortedStrategies.descendingMap().values()) {
                final double[] highestPrecisionIdRangePerDimension = subStrategy.getIndexStrategy().getHighestPrecisionIdRangePerDimension();
                // if the id range is less than or equal to the target
                // resolution in each dimension, use this substrategy
                boolean withinTargetResolution = true;
                for (int d = 0; d < highestPrecisionIdRangePerDimension.length; d++) {
                    if (highestPrecisionIdRangePerDimension[d] > targetResolutionPerDimensionForHierarchicalIndex[d]) {
                        withinTargetResolution = false;
                        break;
                    }
                }
                if (withinTargetResolution) {
                    targetIndexStrategy = subStrategy;
                    break;
                }
            }
            if (targetIndexStrategy == null) {
                // if there is not a substrategy that is within the target
                // resolution, use the first substrategy (the lowest range
                // per dimension, which is the highest precision)
                targetIndexStrategy = sortedStrategies.firstEntry().getValue();
            }
            indexStrategy = targetIndexStrategy.getIndexStrategy();
        }
    }
    if ((constraints == null) || constraints.isEmpty()) {
        if (targetIndexStrategy != null) {
            // at least use the prefix of a substrategy if chosen
            return new QueryRanges(new byte[][] { targetIndexStrategy.getPrefix() });
        }
        // implies in negative and
        return new QueryRanges();
    // positive infinity
    } else {
        final List<QueryRanges> ranges = new ArrayList<>(constraints.size());
        for (final MultiDimensionalNumericData nd : constraints) {
            ranges.add(indexStrategy.getQueryRanges(nd, maxRanges, hints));
        }
        return ranges.size() > 1 ? new QueryRanges(ranges) : ranges.get(0);
    }
}
Also used : SubStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy) SinglePartitionQueryRanges(org.locationtech.geowave.core.index.SinglePartitionQueryRanges) QueryRanges(org.locationtech.geowave.core.index.QueryRanges) MultiDimensionalNumericData(org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData) HierarchicalNumericIndexStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) CustomIndex(org.locationtech.geowave.core.store.index.CustomIndex) InternalCustomConstraints(org.locationtech.geowave.core.store.query.constraints.CustomQueryConstraints.InternalCustomConstraints) HierarchicalNumericIndexStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy) NumericIndexStrategy(org.locationtech.geowave.core.index.NumericIndexStrategy)

Example 2 with SubStrategy

use of org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy in project geowave by locationtech.

the class RasterDataAdapter method convertToIndex.

@Override
public Iterator<GridCoverage> convertToIndex(final Index index, final GridCoverage gridCoverage) {
    final HierarchicalNumericIndexStrategy indexStrategy = CompoundHierarchicalIndexStrategyWrapper.findHierarchicalStrategy(index.getIndexStrategy());
    if (indexStrategy != null) {
        final CoordinateReferenceSystem sourceCrs = gridCoverage.getCoordinateReferenceSystem();
        final Envelope sampleEnvelope = gridCoverage.getEnvelope();
        final ReferencedEnvelope sampleReferencedEnvelope = new ReferencedEnvelope(new org.locationtech.jts.geom.Envelope(sampleEnvelope.getMinimum(0), sampleEnvelope.getMaximum(0), sampleEnvelope.getMinimum(1), sampleEnvelope.getMaximum(1)), gridCoverage.getCoordinateReferenceSystem());
        ReferencedEnvelope projectedReferenceEnvelope = sampleReferencedEnvelope;
        final CoordinateReferenceSystem indexCrs = GeometryUtils.getIndexCrs(index);
        if (!indexCrs.equals(sourceCrs)) {
            try {
                projectedReferenceEnvelope = sampleReferencedEnvelope.transform(indexCrs, true);
            } catch (TransformException | FactoryException e) {
                LOGGER.warn("Unable to transform envelope of grid coverage to Index CRS", e);
            }
        }
        final MultiDimensionalNumericData bounds;
        if (indexCrs.equals(GeometryUtils.getDefaultCRS())) {
            bounds = IndexUtils.clampAtIndexBounds(GeometryUtils.basicConstraintSetFromEnvelope(projectedReferenceEnvelope).getIndexConstraints(indexStrategy), indexStrategy);
        } else {
            bounds = IndexUtils.clampAtIndexBounds(GeometryUtils.getBoundsFromEnvelope(projectedReferenceEnvelope), indexStrategy);
        }
        final GridEnvelope gridEnvelope = gridCoverage.getGridGeometry().getGridRange();
        // only one set of constraints..hence reference '0' element
        final double[] tileRangePerDimension = new double[bounds.getDimensionCount()];
        final Double[] maxValuesPerDimension = bounds.getMaxValuesPerDimension();
        final Double[] minValuesPerDimension = bounds.getMinValuesPerDimension();
        for (int d = 0; d < tileRangePerDimension.length; d++) {
            tileRangePerDimension[d] = ((maxValuesPerDimension[d] - minValuesPerDimension[d]) * tileSize) / gridEnvelope.getSpan(d);
        }
        final TreeMap<Double, SubStrategy> substrategyMap = new TreeMap<>();
        for (final SubStrategy pyramidLevel : indexStrategy.getSubStrategies()) {
            final double[] idRangePerDimension = pyramidLevel.getIndexStrategy().getHighestPrecisionIdRangePerDimension();
            // to create a pyramid, ingest into each substrategy that is
            // lower resolution than the sample set in at least one
            // dimension and the one substrategy that is at least the same
            // resolution or higher resolution to retain the original
            // resolution as well as possible
            double maxSubstrategyResToSampleSetRes = -Double.MAX_VALUE;
            for (int d = 0; d < tileRangePerDimension.length; d++) {
                final double substrategyResToSampleSetRes = idRangePerDimension[d] / tileRangePerDimension[d];
                maxSubstrategyResToSampleSetRes = Math.max(maxSubstrategyResToSampleSetRes, substrategyResToSampleSetRes);
            }
            substrategyMap.put(maxSubstrategyResToSampleSetRes, pyramidLevel);
        }
        // all entries will be greater than 1 (lower resolution pyramid
        // levels)
        // also try to find the one entry that is closest to 1.0 without
        // going over (this will be the full resolution level)
        // add an epsilon to try to catch any roundoff error
        final double fullRes = 1.0 + MathUtils.EPSILON;
        final Entry<Double, SubStrategy> fullResEntry = substrategyMap.floorEntry(fullRes);
        final List<SubStrategy> pyramidLevels = new ArrayList<>();
        if (fullResEntry != null) {
            pyramidLevels.add(fullResEntry.getValue());
        }
        if (buildPyramid) {
            final NavigableMap<Double, SubStrategy> map = substrategyMap.tailMap(fullRes, false);
            pyramidLevels.addAll(map.values());
        }
        if (pyramidLevels.isEmpty()) {
            // this case shouldn't occur theoretically, but just in case,
            // make sure the substrategy closest to 1.0 is used
            final Entry<Double, SubStrategy> bestEntry = substrategyMap.higherEntry(1.0);
            pyramidLevels.add(bestEntry.getValue());
        }
        return new IteratorWrapper<>(pyramidLevels.iterator(), new MosaicPerPyramidLevelBuilder(bounds, gridCoverage, tileSize, backgroundValuesPerBand, RasterUtils.getFootprint(projectedReferenceEnvelope, gridCoverage), interpolation, projectedReferenceEnvelope.getCoordinateReferenceSystem()));
    }
    LOGGER.warn("Strategy is not an instance of HierarchicalNumericIndexStrategy : " + index.getIndexStrategy().getClass().getName());
    return Collections.<GridCoverage>emptyIterator();
}
Also used : FactoryException(org.opengis.referencing.FactoryException) ArrayList(java.util.ArrayList) GeneralEnvelope(org.geotools.geometry.GeneralEnvelope) Envelope(org.opengis.geometry.Envelope) GridEnvelope(org.opengis.coverage.grid.GridEnvelope) ReferencedEnvelope(org.geotools.geometry.jts.ReferencedEnvelope) ReferencedEnvelope(org.geotools.geometry.jts.ReferencedEnvelope) CoordinateReferenceSystem(org.opengis.referencing.crs.CoordinateReferenceSystem) SubStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy) MultiDimensionalNumericData(org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData) HierarchicalNumericIndexStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy) GridEnvelope(org.opengis.coverage.grid.GridEnvelope) NoninvertibleTransformException(java.awt.geom.NoninvertibleTransformException) TransformException(org.opengis.referencing.operation.TransformException) TreeMap(java.util.TreeMap) Point(java.awt.Point) IteratorWrapper(org.locationtech.geowave.core.store.util.IteratorWrapper) GridCoverage(org.opengis.coverage.grid.GridCoverage) FitToIndexGridCoverage(org.locationtech.geowave.adapter.raster.FitToIndexGridCoverage)

Example 3 with SubStrategy

use of org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy in project geowave by locationtech.

the class DeletePyramidLevelCommand method run.

public void run(final OperationParams params) {
    // Ensure we have all the required arguments
    if (parameters.size() != 1) {
        throw new ParameterException("Requires argument: <store name>");
    }
    final String inputStoreName = parameters.get(0);
    // Attempt to load store.
    inputStoreOptions = CLIUtils.loadStore(inputStoreName, getGeoWaveConfigFile(params), params.getConsole());
    final DataStore store = inputStoreOptions.createDataStore();
    RasterDataAdapter adapter = null;
    for (final DataTypeAdapter<?> type : store.getTypes()) {
        if (isRaster(type) && ((coverageName == null) || coverageName.equals(adapter.getTypeName()))) {
            if (adapter != null) {
                LOGGER.error("Store has multiple coverages.  Must explicitly choose one with --coverage option.");
                return;
            }
            adapter = (RasterDataAdapter) type;
        }
    }
    if (adapter == null) {
        LOGGER.error("Store has no coverages or coverage name not found.");
        return;
    }
    boolean found = false;
    Resolution res = null;
    Index i = null;
    for (final Index index : store.getIndices(adapter.getTypeName())) {
        final HierarchicalNumericIndexStrategy indexStrategy = CompoundHierarchicalIndexStrategyWrapper.findHierarchicalStrategy(index.getIndexStrategy());
        if (indexStrategy != null) {
            for (final SubStrategy s : indexStrategy.getSubStrategies()) {
                if ((s.getPrefix().length == 1) && (s.getPrefix()[0] == level)) {
                    LOGGER.info("Deleting from index " + index.getName());
                    final double[] tileRes = s.getIndexStrategy().getHighestPrecisionIdRangePerDimension();
                    final double[] pixelRes = new double[tileRes.length];
                    for (int d = 0; d < tileRes.length; d++) {
                        pixelRes[d] = tileRes[d] / adapter.getTileSize();
                    }
                    found = true;
                    i = index;
                    res = new Resolution(pixelRes);
                    break;
                }
            }
        }
        if (found) {
            break;
        }
    }
    if (!found) {
        LOGGER.error("Store has no indices supporting pyramids.");
        return;
    }
    final byte[][] predefinedSplits = i.getIndexStrategy().getPredefinedSplits();
    // this should account for hash partitioning if used
    final List<ByteArray> partitions = new ArrayList<>();
    if ((predefinedSplits != null) && (predefinedSplits.length > 0)) {
        for (final byte[] split : predefinedSplits) {
            partitions.add(new ByteArray(ArrayUtils.add(split, level.byteValue())));
        }
    } else {
        partitions.add(new ByteArray(new byte[] { level.byteValue() }));
    }
    // delete the resolution from the overview, delete the partitions, and delete the data
    if (inputStoreOptions.getFactoryOptions().getStoreOptions().isPersistDataStatistics()) {
        final DataStatisticsStore statsStore = inputStoreOptions.createDataStatisticsStore();
        boolean overviewStatsFound = false;
        boolean partitionStatsFound = false;
        try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> it = statsStore.getDataTypeStatistics(adapter, RasterOverviewStatistic.STATS_TYPE, null)) {
            while (it.hasNext()) {
                final Statistic<? extends StatisticValue<?>> next = it.next();
                if ((next instanceof RasterOverviewStatistic) && (next.getBinningStrategy() == null)) {
                    final RasterOverviewStatistic statistic = (RasterOverviewStatistic) next;
                    final RasterOverviewValue value = statsStore.getStatisticValue(statistic);
                    if (!value.removeResolution(res)) {
                        LOGGER.error("Unable to remove resolution for pyramid level " + level);
                        return;
                    }
                    statsStore.setStatisticValue(statistic, value);
                    overviewStatsFound = true;
                }
            }
        }
        if (!overviewStatsFound) {
            LOGGER.error("Unable to find overview stats for coverage " + adapter.getTypeName());
            return;
        }
        try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> it = statsStore.getIndexStatistics(i, PartitionsStatistic.STATS_TYPE, null)) {
            while (it.hasNext()) {
                final Statistic<? extends StatisticValue<?>> next = it.next();
                if (next instanceof PartitionsStatistic) {
                    if ((next.getBinningStrategy() != null) && (next.getBinningStrategy() instanceof DataTypeBinningStrategy)) {
                        final PartitionsStatistic statistic = (PartitionsStatistic) next;
                        final PartitionsValue value = statsStore.getStatisticValue((PartitionsStatistic) next, DataTypeBinningStrategy.getBin(adapter));
                        for (final ByteArray p : partitions) {
                            if (!value.getValue().remove(p)) {
                                LOGGER.error("Unable to remove partition " + p.getHexString() + " for pyramid level " + level);
                                return;
                            }
                        }
                        statsStore.setStatisticValue(statistic, value, DataTypeBinningStrategy.getBin(adapter));
                        partitionStatsFound = true;
                    }
                }
            }
        }
        if (!partitionStatsFound) {
            LOGGER.error("Unable to find partition stats for coverage " + adapter.getTypeName() + " and index " + i.getName());
            return;
        }
    }
    for (final ByteArray p : partitions) {
        store.delete(QueryBuilder.newBuilder().constraints(QueryBuilder.newBuilder().constraintsFactory().prefix(p.getBytes(), null)).addTypeName(adapter.getTypeName()).indexName(i.getName()).build());
    }
}
Also used : PartitionsValue(org.locationtech.geowave.core.store.statistics.index.PartitionsStatistic.PartitionsValue) ArrayList(java.util.ArrayList) Index(org.locationtech.geowave.core.store.api.Index) PartitionsStatistic(org.locationtech.geowave.core.store.statistics.index.PartitionsStatistic) DataStatisticsStore(org.locationtech.geowave.core.store.statistics.DataStatisticsStore) DataStore(org.locationtech.geowave.core.store.api.DataStore) RasterOverviewValue(org.locationtech.geowave.adapter.raster.stats.RasterOverviewStatistic.RasterOverviewValue) ByteArray(org.locationtech.geowave.core.index.ByteArray) ParameterException(com.beust.jcommander.ParameterException) SubStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy) HierarchicalNumericIndexStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy) DataTypeBinningStrategy(org.locationtech.geowave.core.store.statistics.binning.DataTypeBinningStrategy) RasterDataAdapter(org.locationtech.geowave.adapter.raster.adapter.RasterDataAdapter) RasterOverviewStatistic(org.locationtech.geowave.adapter.raster.stats.RasterOverviewStatistic) Resolution(org.locationtech.geowave.adapter.raster.Resolution)

Example 4 with SubStrategy

use of org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy in project geowave by locationtech.

the class TieredSpatialJoin method join.

@Override
public void join(final SparkSession spark, final GeoWaveIndexedRDD leftRDD, final GeoWaveIndexedRDD rightRDD, final GeomFunction predicate) throws InterruptedException, ExecutionException {
    // Get SparkContext from session
    final SparkContext sc = spark.sparkContext();
    final JavaSparkContext javaSC = JavaSparkContext.fromSparkContext(sc);
    final NumericIndexStrategy leftStrategy = leftRDD.getIndexStrategy().getValue();
    final NumericIndexStrategy rightStrategy = rightRDD.getIndexStrategy().getValue();
    // Check if either dataset supports the join
    TieredSFCIndexStrategy tieredStrategy = null;
    // Determine if either strategy needs to be reindexed to support join algorithm
    boolean reindexLeft = false;
    boolean reindexRight = false;
    final boolean leftSupport = supportsJoin(leftStrategy);
    final boolean rightSupport = supportsJoin(rightStrategy);
    if (leftSupport && rightSupport) {
        if (leftStrategy.equals(rightStrategy)) {
            // Both strategies match we don't have to reindex
            tieredStrategy = (TieredSFCIndexStrategy) leftStrategy;
        } else {
            // support but don't match
            if (getJoinOptions().getJoinBuildSide() == JoinOptions.BuildSide.LEFT) {
                reindexRight = true;
                tieredStrategy = (TieredSFCIndexStrategy) leftStrategy;
            } else {
                reindexLeft = true;
                tieredStrategy = (TieredSFCIndexStrategy) rightStrategy;
            }
        }
    } else if (leftSupport) {
        reindexRight = true;
        tieredStrategy = (TieredSFCIndexStrategy) leftStrategy;
    } else if (rightSupport) {
        reindexLeft = true;
        tieredStrategy = (TieredSFCIndexStrategy) rightStrategy;
    } else {
        tieredStrategy = (TieredSFCIndexStrategy) createDefaultStrategy(leftStrategy);
        if (tieredStrategy == null) {
            tieredStrategy = (TieredSFCIndexStrategy) createDefaultStrategy(rightStrategy);
        }
        if (tieredStrategy == null) {
            LOGGER.error("Cannot create default strategy from either provided strategy. Datasets cannot be joined.");
            return;
        }
        reindexLeft = true;
        reindexRight = true;
    }
    // Pull information and broadcast strategy used for join
    final SubStrategy[] tierStrategies = tieredStrategy.getSubStrategies();
    final int tierCount = tierStrategies.length;
    // Create broadcast variable for indexing strategy
    // Cast is safe because we must be instance of TieredSFCIndexStrategy to support join.
    final Broadcast<TieredSFCIndexStrategy> broadcastStrategy = (Broadcast<TieredSFCIndexStrategy>) RDDUtils.broadcastIndexStrategy(sc, tieredStrategy);
    final Broadcast<GeomFunction> geomPredicate = javaSC.broadcast(predicate);
    // If needed reindex one of the strategies we will wrap the buffer operation into the reindex
    // operation
    // Otherwise we buffer based off the buildside of the join.
    setBufferAmount(predicate.getBufferAmount());
    // Reindex if necessary and get RDD of indexed Geometry
    JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> leftIndex = null;
    JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> rightIndex = null;
    if (reindexLeft && reindexRight) {
        leftRDD.reindex(broadcastStrategy);
        rightRDD.reindex(broadcastStrategy);
    } else if (reindexLeft) {
        leftRDD.reindex(broadcastStrategy);
    } else if (reindexRight) {
        rightRDD.reindex(broadcastStrategy);
    }
    if (joinOpts.getJoinBuildSide() == BuildSide.LEFT) {
        rightIndex = rightRDD.getIndexedGeometryRDD(bufferDistance, true);
        leftIndex = leftRDD.getIndexedGeometryRDD();
    } else {
        leftIndex = leftRDD.getIndexedGeometryRDD(bufferDistance, true);
        rightIndex = rightRDD.getIndexedGeometryRDD();
    }
    final int leftPartCount = leftIndex.getNumPartitions();
    final int rightPartCount = rightIndex.getNumPartitions();
    final int highestPartCount = (leftPartCount > rightPartCount) ? leftPartCount : rightPartCount;
    final int largePartitionerCount = (int) (1.5 * highestPartCount);
    final HashPartitioner partitioner = new HashPartitioner(largePartitionerCount);
    final JavaFutureAction<List<Byte>> leftFuture = leftIndex.setName("LeftIndex").keys().map(t -> t.getBytes()[0]).distinct(4).collectAsync();
    final JavaFutureAction<List<Byte>> rightFuture = rightIndex.setName("RightIndex").keys().map(t -> t.getBytes()[0]).distinct(4).collectAsync();
    // Get the result of future
    final List<Byte> rightDataTiers = Lists.newArrayList(rightFuture.get());
    // Sort tiers highest to lowest and collect information.
    final Byte[] rightTierArr = rightDataTiers.toArray(new Byte[0]);
    Arrays.sort(rightTierArr);
    final int rightTierCount = rightTierArr.length;
    final List<Byte> leftDataTiers = Lists.newArrayList(leftFuture.get());
    final Byte[] leftTierArr = leftDataTiers.toArray(new Byte[0]);
    Arrays.sort(leftTierArr);
    final int leftTierCount = leftTierArr.length;
    // Determine if there are common higher tiers for whole dataset on either side.
    final byte highestLeftTier = leftTierArr[leftTierArr.length - 1];
    final byte highestRightTier = rightTierArr[rightTierArr.length - 1];
    // Find a common run of higher tiers
    Byte[] commonLeftTiers = ArrayUtils.EMPTY_BYTE_OBJECT_ARRAY;
    Byte[] commonRightTiers = ArrayUtils.EMPTY_BYTE_OBJECT_ARRAY;
    boolean skipMapCreate = false;
    if (leftTierArr[0] > highestRightTier) {
        // Whole left dataset is higher tiers than right
        commonLeftTiers = leftTierArr;
        skipMapCreate = true;
    } else if (rightTierArr[0] > highestLeftTier) {
        // Whole right dataset is higher tiers than left
        commonRightTiers = rightTierArr;
        skipMapCreate = true;
    }
    LOGGER.debug("Tier Count: " + tierCount);
    LOGGER.debug("Left Tier Count: " + leftTierCount + " Right Tier Count: " + rightTierCount);
    LOGGER.debug("Left Tiers: " + leftDataTiers);
    LOGGER.debug("Right Tiers: " + rightDataTiers);
    Map<Byte, HashSet<Byte>> rightReprojectMap = new HashMap<>();
    Map<Byte, HashSet<Byte>> leftReprojectMap = new HashMap<>();
    final HashSet<Byte> sharedTiers = Sets.newHashSetWithExpectedSize(tierCount / 2);
    if (!skipMapCreate) {
        leftReprojectMap = createReprojectMap(leftTierArr, rightTierArr, sharedTiers);
        rightReprojectMap = createReprojectMap(rightTierArr, leftTierArr, sharedTiers);
    }
    JavaRDD<Tuple2<GeoWaveInputKey, Geometry>> commonRightRDD = null;
    final boolean commonRightExist = commonRightTiers != ArrayUtils.EMPTY_BYTE_OBJECT_ARRAY;
    if (commonRightExist) {
        commonRightRDD = rightRDD.getGeoWaveRDD().getRawRDD().filter(t -> t._2.getDefaultGeometry() != null).mapValues((Function<SimpleFeature, Geometry>) t -> {
            return (Geometry) t.getDefaultGeometry();
        }).distinct(largePartitionerCount).rdd().toJavaRDD();
    }
    JavaRDD<Tuple2<GeoWaveInputKey, Geometry>> commonLeftRDD = null;
    final boolean commonLeftExist = commonLeftTiers != ArrayUtils.EMPTY_BYTE_OBJECT_ARRAY;
    if (commonLeftExist) {
        commonLeftRDD = leftRDD.getGeoWaveRDD().getRawRDD().filter(t -> t._2.getDefaultGeometry() != null).mapValues((Function<SimpleFeature, Geometry>) t -> {
            return (Geometry) t.getDefaultGeometry();
        }).distinct(largePartitionerCount).rdd().toJavaRDD();
    }
    // Iterate through left tiers. Joining higher right and same level tiers
    for (final Byte leftTierId : leftDataTiers) {
        final HashSet<Byte> higherRightTiers = leftReprojectMap.get(leftTierId);
        JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> leftTier = null;
        final boolean higherTiersExist = ((higherRightTiers != null) && !higherRightTiers.isEmpty());
        final boolean sameTierExist = sharedTiers.contains(leftTierId);
        if (commonRightExist || higherTiersExist || sameTierExist) {
            leftTier = filterTier(leftIndex, leftTierId);
        } else {
            // No tiers to compare against this tier
            continue;
        }
        // Check for same tier existence on both sides and join without reprojection.
        if (sameTierExist) {
            final JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> rightTier = rightIndex.filter(t -> t._1().getBytes()[0] == leftTierId);
            final JavaPairRDD<GeoWaveInputKey, ByteArray> finalMatches = joinAndCompareTiers(leftTier, rightTier, geomPredicate, highestPartCount, partitioner);
            addMatches(finalMatches);
        }
        // Join against higher common tiers for this dataset
        JavaRDD<Tuple2<GeoWaveInputKey, Geometry>> rightTiers = null;
        if (commonRightExist) {
            rightTiers = commonRightRDD;
        } else if (higherTiersExist) {
            final Broadcast<HashSet<Byte>> higherBroadcast = javaSC.broadcast(higherRightTiers);
            rightTiers = prepareForReproject(rightIndex.filter(t -> higherBroadcast.value().contains(t._1().getBytes()[0])), largePartitionerCount);
        }
        if (rightTiers != null) {
            final JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> reprojected = reprojectToTier(rightTiers, leftTierId, broadcastStrategy, getBufferAmount(BuildSide.RIGHT), partitioner);
            final JavaPairRDD<GeoWaveInputKey, ByteArray> finalMatches = joinAndCompareTiers(leftTier, reprojected, geomPredicate, highestPartCount, partitioner);
            addMatches(finalMatches);
        }
    }
    for (final Byte rightTierId : rightDataTiers) {
        final HashSet<Byte> higherLeftTiers = rightReprojectMap.get(rightTierId);
        JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> rightTier = null;
        final boolean higherLeftExist = ((higherLeftTiers != null) && !higherLeftTiers.isEmpty());
        if (commonLeftExist || higherLeftExist) {
            rightTier = rightIndex.filter(t -> t._1().getBytes()[0] == rightTierId);
        } else {
            // No tiers to compare against this tier
            continue;
        }
        JavaPairRDD<GeoWaveInputKey, ByteArray> finalMatches = null;
        JavaRDD<Tuple2<GeoWaveInputKey, Geometry>> leftTiers = null;
        if (commonLeftExist) {
            leftTiers = commonLeftRDD;
        } else {
            final Broadcast<HashSet<Byte>> higherBroadcast = javaSC.broadcast(higherLeftTiers);
            leftTiers = prepareForReproject(leftIndex.filter(t -> higherBroadcast.value().contains(t._1.getBytes()[0])), largePartitionerCount);
        }
        final JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> reprojected = reprojectToTier(leftTiers, rightTierId, broadcastStrategy, getBufferAmount(BuildSide.LEFT), partitioner);
        finalMatches = joinAndCompareTiers(reprojected, rightTier, geomPredicate, highestPartCount, partitioner);
        addMatches(finalMatches);
    }
    // Remove duplicates between tiers
    combinedResults = javaSC.union((JavaPairRDD[]) (ArrayUtils.add(tierMatches.toArray(new JavaPairRDD[tierMatches.size()]), combinedResults)));
    combinedResults = combinedResults.reduceByKey((id1, id2) -> id1);
    combinedResults = combinedResults.setName("CombinedJoinResults").persist(StorageLevel.MEMORY_ONLY_SER());
    // Force evaluation of RDD at the join function call.
    // Otherwise it doesn't actually perform work until something is called
    // on left/right joined.
    // Wish there was a better way to force evaluation of rdd safely.
    // isEmpty() triggers take(1) which shouldn't involve a shuffle.
    combinedResults.isEmpty();
    // don't recalculate
    if (getJoinOptions().isNegativePredicate()) {
        setLeftResults(new GeoWaveRDD(leftRDD.getGeoWaveRDD().getRawRDD().subtractByKey(combinedResults).cache()));
        setRightResults(new GeoWaveRDD(rightRDD.getGeoWaveRDD().getRawRDD().subtractByKey(combinedResults).cache()));
    } else {
        setLeftResults(new GeoWaveRDD(leftRDD.getGeoWaveRDD().getRawRDD().join(combinedResults).mapToPair(t -> new Tuple2<>(t._1(), t._2._1())).cache()));
        setRightResults(new GeoWaveRDD(rightRDD.getGeoWaveRDD().getRawRDD().join(combinedResults).mapToPair(t -> new Tuple2<>(t._1(), t._2._1())).cache()));
    }
    leftIndex.unpersist();
    rightIndex.unpersist();
}
Also used : ByteArray(org.locationtech.geowave.core.index.ByteArray) Arrays(java.util.Arrays) GeoWaveInputKey(org.locationtech.geowave.mapreduce.input.GeoWaveInputKey) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) PairFlatMapFunction(org.apache.spark.api.java.function.PairFlatMapFunction) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) SpatialDimensionalityTypeProvider(org.locationtech.geowave.core.geotime.index.SpatialDimensionalityTypeProvider) SubStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy) GeomFunction(org.locationtech.geowave.analytic.spark.sparksql.udf.GeomFunction) HashSet(java.util.HashSet) SpatialTemporalOptions(org.locationtech.geowave.core.geotime.index.SpatialTemporalOptions) Lists(com.google.common.collect.Lists) StorageLevel(org.apache.spark.storage.StorageLevel) SimpleFeature(org.opengis.feature.simple.SimpleFeature) Map(java.util.Map) Maps(jersey.repackaged.com.google.common.collect.Maps) JavaRDD(org.apache.spark.api.java.JavaRDD) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) SparkSession(org.apache.spark.sql.SparkSession) Broadcast(org.apache.spark.broadcast.Broadcast) RDDUtils(org.locationtech.geowave.analytic.spark.RDDUtils) Logger(org.slf4j.Logger) HashPartitioner(org.apache.spark.HashPartitioner) SparkContext(org.apache.spark.SparkContext) TieredSFCIndexStrategy(org.locationtech.geowave.core.index.sfc.tiered.TieredSFCIndexStrategy) TieredSFCIndexFactory(org.locationtech.geowave.core.index.sfc.tiered.TieredSFCIndexFactory) GeometryUtils(org.locationtech.geowave.core.geotime.util.GeometryUtils) Tuple2(scala.Tuple2) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) JavaFutureAction(org.apache.spark.api.java.JavaFutureAction) Sets(com.google.common.collect.Sets) ExecutionException(java.util.concurrent.ExecutionException) List(java.util.List) InsertionIds(org.locationtech.geowave.core.index.InsertionIds) SingleTierSubStrategy(org.locationtech.geowave.core.index.sfc.tiered.SingleTierSubStrategy) SFCType(org.locationtech.geowave.core.index.sfc.SFCFactory.SFCType) Geometry(org.locationtech.jts.geom.Geometry) BuildSide(org.locationtech.geowave.analytic.spark.spatial.JoinOptions.BuildSide) SpatialTemporalDimensionalityTypeProvider(org.locationtech.geowave.core.geotime.index.SpatialTemporalDimensionalityTypeProvider) Function(org.apache.spark.api.java.function.Function) GeoWaveIndexedRDD(org.locationtech.geowave.analytic.spark.GeoWaveIndexedRDD) MultiDimensionalNumericData(org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData) NumericIndexStrategy(org.locationtech.geowave.core.index.NumericIndexStrategy) Envelope(org.locationtech.jts.geom.Envelope) ArrayUtils(org.apache.commons.lang.ArrayUtils) TieredSFCIndexStrategy(org.locationtech.geowave.core.index.sfc.tiered.TieredSFCIndexStrategy) HashMap(java.util.HashMap) Broadcast(org.apache.spark.broadcast.Broadcast) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) ByteArray(org.locationtech.geowave.core.index.ByteArray) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HashSet(java.util.HashSet) SubStrategy(org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy) SingleTierSubStrategy(org.locationtech.geowave.core.index.sfc.tiered.SingleTierSubStrategy) GeoWaveInputKey(org.locationtech.geowave.mapreduce.input.GeoWaveInputKey) SimpleFeature(org.opengis.feature.simple.SimpleFeature) GeomFunction(org.locationtech.geowave.analytic.spark.sparksql.udf.GeomFunction) Geometry(org.locationtech.jts.geom.Geometry) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkContext(org.apache.spark.SparkContext) Tuple2(scala.Tuple2) HashPartitioner(org.apache.spark.HashPartitioner) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) NumericIndexStrategy(org.locationtech.geowave.core.index.NumericIndexStrategy)

Aggregations

SubStrategy (org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy)4 ArrayList (java.util.ArrayList)3 HierarchicalNumericIndexStrategy (org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy)3 MultiDimensionalNumericData (org.locationtech.geowave.core.index.numeric.MultiDimensionalNumericData)3 TreeMap (java.util.TreeMap)2 ByteArray (org.locationtech.geowave.core.index.ByteArray)2 NumericIndexStrategy (org.locationtech.geowave.core.index.NumericIndexStrategy)2 ParameterException (com.beust.jcommander.ParameterException)1 Lists (com.google.common.collect.Lists)1 Sets (com.google.common.collect.Sets)1 Point (java.awt.Point)1 NoninvertibleTransformException (java.awt.geom.NoninvertibleTransformException)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 ExecutionException (java.util.concurrent.ExecutionException)1 Maps (jersey.repackaged.com.google.common.collect.Maps)1 ArrayUtils (org.apache.commons.lang.ArrayUtils)1