use of org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy in project geowave by locationtech.
the class DataStoreUtils method constraintsToQueryRanges.
public static QueryRanges constraintsToQueryRanges(final List<MultiDimensionalNumericData> constraints, final Index index, final double[] targetResolutionPerDimensionForHierarchicalIndex, final int maxRanges, final IndexMetaData... hints) {
if ((index instanceof CustomIndex) && (constraints != null) && (constraints.size() == 1) && (constraints.get(0) instanceof InternalCustomConstraints)) {
return ((CustomIndex) index).getQueryRanges(((InternalCustomConstraints) constraints.get(0)).getCustomConstraints());
}
NumericIndexStrategy indexStrategy = index.getIndexStrategy();
SubStrategy targetIndexStrategy = null;
if ((targetResolutionPerDimensionForHierarchicalIndex != null) && (targetResolutionPerDimensionForHierarchicalIndex.length == indexStrategy.getOrderedDimensionDefinitions().length)) {
// determine the correct tier to query for the given resolution
final HierarchicalNumericIndexStrategy strategy = CompoundHierarchicalIndexStrategyWrapper.findHierarchicalStrategy(indexStrategy);
if (strategy != null) {
final TreeMap<Double, SubStrategy> sortedStrategies = new TreeMap<>();
for (final SubStrategy subStrategy : strategy.getSubStrategies()) {
final double[] idRangePerDimension = subStrategy.getIndexStrategy().getHighestPrecisionIdRangePerDimension();
double rangeSum = 0;
for (final double range : idRangePerDimension) {
rangeSum += range;
}
// sort by the sum of the range in each dimension
sortedStrategies.put(rangeSum, subStrategy);
}
for (final SubStrategy subStrategy : sortedStrategies.descendingMap().values()) {
final double[] highestPrecisionIdRangePerDimension = subStrategy.getIndexStrategy().getHighestPrecisionIdRangePerDimension();
// if the id range is less than or equal to the target
// resolution in each dimension, use this substrategy
boolean withinTargetResolution = true;
for (int d = 0; d < highestPrecisionIdRangePerDimension.length; d++) {
if (highestPrecisionIdRangePerDimension[d] > targetResolutionPerDimensionForHierarchicalIndex[d]) {
withinTargetResolution = false;
break;
}
}
if (withinTargetResolution) {
targetIndexStrategy = subStrategy;
break;
}
}
if (targetIndexStrategy == null) {
// if there is not a substrategy that is within the target
// resolution, use the first substrategy (the lowest range
// per dimension, which is the highest precision)
targetIndexStrategy = sortedStrategies.firstEntry().getValue();
}
indexStrategy = targetIndexStrategy.getIndexStrategy();
}
}
if ((constraints == null) || constraints.isEmpty()) {
if (targetIndexStrategy != null) {
// at least use the prefix of a substrategy if chosen
return new QueryRanges(new byte[][] { targetIndexStrategy.getPrefix() });
}
// implies in negative and
return new QueryRanges();
// positive infinity
} else {
final List<QueryRanges> ranges = new ArrayList<>(constraints.size());
for (final MultiDimensionalNumericData nd : constraints) {
ranges.add(indexStrategy.getQueryRanges(nd, maxRanges, hints));
}
return ranges.size() > 1 ? new QueryRanges(ranges) : ranges.get(0);
}
}
use of org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy in project geowave by locationtech.
the class RasterDataAdapter method convertToIndex.
@Override
public Iterator<GridCoverage> convertToIndex(final Index index, final GridCoverage gridCoverage) {
final HierarchicalNumericIndexStrategy indexStrategy = CompoundHierarchicalIndexStrategyWrapper.findHierarchicalStrategy(index.getIndexStrategy());
if (indexStrategy != null) {
final CoordinateReferenceSystem sourceCrs = gridCoverage.getCoordinateReferenceSystem();
final Envelope sampleEnvelope = gridCoverage.getEnvelope();
final ReferencedEnvelope sampleReferencedEnvelope = new ReferencedEnvelope(new org.locationtech.jts.geom.Envelope(sampleEnvelope.getMinimum(0), sampleEnvelope.getMaximum(0), sampleEnvelope.getMinimum(1), sampleEnvelope.getMaximum(1)), gridCoverage.getCoordinateReferenceSystem());
ReferencedEnvelope projectedReferenceEnvelope = sampleReferencedEnvelope;
final CoordinateReferenceSystem indexCrs = GeometryUtils.getIndexCrs(index);
if (!indexCrs.equals(sourceCrs)) {
try {
projectedReferenceEnvelope = sampleReferencedEnvelope.transform(indexCrs, true);
} catch (TransformException | FactoryException e) {
LOGGER.warn("Unable to transform envelope of grid coverage to Index CRS", e);
}
}
final MultiDimensionalNumericData bounds;
if (indexCrs.equals(GeometryUtils.getDefaultCRS())) {
bounds = IndexUtils.clampAtIndexBounds(GeometryUtils.basicConstraintSetFromEnvelope(projectedReferenceEnvelope).getIndexConstraints(indexStrategy), indexStrategy);
} else {
bounds = IndexUtils.clampAtIndexBounds(GeometryUtils.getBoundsFromEnvelope(projectedReferenceEnvelope), indexStrategy);
}
final GridEnvelope gridEnvelope = gridCoverage.getGridGeometry().getGridRange();
// only one set of constraints..hence reference '0' element
final double[] tileRangePerDimension = new double[bounds.getDimensionCount()];
final Double[] maxValuesPerDimension = bounds.getMaxValuesPerDimension();
final Double[] minValuesPerDimension = bounds.getMinValuesPerDimension();
for (int d = 0; d < tileRangePerDimension.length; d++) {
tileRangePerDimension[d] = ((maxValuesPerDimension[d] - minValuesPerDimension[d]) * tileSize) / gridEnvelope.getSpan(d);
}
final TreeMap<Double, SubStrategy> substrategyMap = new TreeMap<>();
for (final SubStrategy pyramidLevel : indexStrategy.getSubStrategies()) {
final double[] idRangePerDimension = pyramidLevel.getIndexStrategy().getHighestPrecisionIdRangePerDimension();
// to create a pyramid, ingest into each substrategy that is
// lower resolution than the sample set in at least one
// dimension and the one substrategy that is at least the same
// resolution or higher resolution to retain the original
// resolution as well as possible
double maxSubstrategyResToSampleSetRes = -Double.MAX_VALUE;
for (int d = 0; d < tileRangePerDimension.length; d++) {
final double substrategyResToSampleSetRes = idRangePerDimension[d] / tileRangePerDimension[d];
maxSubstrategyResToSampleSetRes = Math.max(maxSubstrategyResToSampleSetRes, substrategyResToSampleSetRes);
}
substrategyMap.put(maxSubstrategyResToSampleSetRes, pyramidLevel);
}
// all entries will be greater than 1 (lower resolution pyramid
// levels)
// also try to find the one entry that is closest to 1.0 without
// going over (this will be the full resolution level)
// add an epsilon to try to catch any roundoff error
final double fullRes = 1.0 + MathUtils.EPSILON;
final Entry<Double, SubStrategy> fullResEntry = substrategyMap.floorEntry(fullRes);
final List<SubStrategy> pyramidLevels = new ArrayList<>();
if (fullResEntry != null) {
pyramidLevels.add(fullResEntry.getValue());
}
if (buildPyramid) {
final NavigableMap<Double, SubStrategy> map = substrategyMap.tailMap(fullRes, false);
pyramidLevels.addAll(map.values());
}
if (pyramidLevels.isEmpty()) {
// this case shouldn't occur theoretically, but just in case,
// make sure the substrategy closest to 1.0 is used
final Entry<Double, SubStrategy> bestEntry = substrategyMap.higherEntry(1.0);
pyramidLevels.add(bestEntry.getValue());
}
return new IteratorWrapper<>(pyramidLevels.iterator(), new MosaicPerPyramidLevelBuilder(bounds, gridCoverage, tileSize, backgroundValuesPerBand, RasterUtils.getFootprint(projectedReferenceEnvelope, gridCoverage), interpolation, projectedReferenceEnvelope.getCoordinateReferenceSystem()));
}
LOGGER.warn("Strategy is not an instance of HierarchicalNumericIndexStrategy : " + index.getIndexStrategy().getClass().getName());
return Collections.<GridCoverage>emptyIterator();
}
use of org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy in project geowave by locationtech.
the class DeletePyramidLevelCommand method run.
public void run(final OperationParams params) {
// Ensure we have all the required arguments
if (parameters.size() != 1) {
throw new ParameterException("Requires argument: <store name>");
}
final String inputStoreName = parameters.get(0);
// Attempt to load store.
inputStoreOptions = CLIUtils.loadStore(inputStoreName, getGeoWaveConfigFile(params), params.getConsole());
final DataStore store = inputStoreOptions.createDataStore();
RasterDataAdapter adapter = null;
for (final DataTypeAdapter<?> type : store.getTypes()) {
if (isRaster(type) && ((coverageName == null) || coverageName.equals(adapter.getTypeName()))) {
if (adapter != null) {
LOGGER.error("Store has multiple coverages. Must explicitly choose one with --coverage option.");
return;
}
adapter = (RasterDataAdapter) type;
}
}
if (adapter == null) {
LOGGER.error("Store has no coverages or coverage name not found.");
return;
}
boolean found = false;
Resolution res = null;
Index i = null;
for (final Index index : store.getIndices(adapter.getTypeName())) {
final HierarchicalNumericIndexStrategy indexStrategy = CompoundHierarchicalIndexStrategyWrapper.findHierarchicalStrategy(index.getIndexStrategy());
if (indexStrategy != null) {
for (final SubStrategy s : indexStrategy.getSubStrategies()) {
if ((s.getPrefix().length == 1) && (s.getPrefix()[0] == level)) {
LOGGER.info("Deleting from index " + index.getName());
final double[] tileRes = s.getIndexStrategy().getHighestPrecisionIdRangePerDimension();
final double[] pixelRes = new double[tileRes.length];
for (int d = 0; d < tileRes.length; d++) {
pixelRes[d] = tileRes[d] / adapter.getTileSize();
}
found = true;
i = index;
res = new Resolution(pixelRes);
break;
}
}
}
if (found) {
break;
}
}
if (!found) {
LOGGER.error("Store has no indices supporting pyramids.");
return;
}
final byte[][] predefinedSplits = i.getIndexStrategy().getPredefinedSplits();
// this should account for hash partitioning if used
final List<ByteArray> partitions = new ArrayList<>();
if ((predefinedSplits != null) && (predefinedSplits.length > 0)) {
for (final byte[] split : predefinedSplits) {
partitions.add(new ByteArray(ArrayUtils.add(split, level.byteValue())));
}
} else {
partitions.add(new ByteArray(new byte[] { level.byteValue() }));
}
// delete the resolution from the overview, delete the partitions, and delete the data
if (inputStoreOptions.getFactoryOptions().getStoreOptions().isPersistDataStatistics()) {
final DataStatisticsStore statsStore = inputStoreOptions.createDataStatisticsStore();
boolean overviewStatsFound = false;
boolean partitionStatsFound = false;
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> it = statsStore.getDataTypeStatistics(adapter, RasterOverviewStatistic.STATS_TYPE, null)) {
while (it.hasNext()) {
final Statistic<? extends StatisticValue<?>> next = it.next();
if ((next instanceof RasterOverviewStatistic) && (next.getBinningStrategy() == null)) {
final RasterOverviewStatistic statistic = (RasterOverviewStatistic) next;
final RasterOverviewValue value = statsStore.getStatisticValue(statistic);
if (!value.removeResolution(res)) {
LOGGER.error("Unable to remove resolution for pyramid level " + level);
return;
}
statsStore.setStatisticValue(statistic, value);
overviewStatsFound = true;
}
}
}
if (!overviewStatsFound) {
LOGGER.error("Unable to find overview stats for coverage " + adapter.getTypeName());
return;
}
try (CloseableIterator<? extends Statistic<? extends StatisticValue<?>>> it = statsStore.getIndexStatistics(i, PartitionsStatistic.STATS_TYPE, null)) {
while (it.hasNext()) {
final Statistic<? extends StatisticValue<?>> next = it.next();
if (next instanceof PartitionsStatistic) {
if ((next.getBinningStrategy() != null) && (next.getBinningStrategy() instanceof DataTypeBinningStrategy)) {
final PartitionsStatistic statistic = (PartitionsStatistic) next;
final PartitionsValue value = statsStore.getStatisticValue((PartitionsStatistic) next, DataTypeBinningStrategy.getBin(adapter));
for (final ByteArray p : partitions) {
if (!value.getValue().remove(p)) {
LOGGER.error("Unable to remove partition " + p.getHexString() + " for pyramid level " + level);
return;
}
}
statsStore.setStatisticValue(statistic, value, DataTypeBinningStrategy.getBin(adapter));
partitionStatsFound = true;
}
}
}
}
if (!partitionStatsFound) {
LOGGER.error("Unable to find partition stats for coverage " + adapter.getTypeName() + " and index " + i.getName());
return;
}
}
for (final ByteArray p : partitions) {
store.delete(QueryBuilder.newBuilder().constraints(QueryBuilder.newBuilder().constraintsFactory().prefix(p.getBytes(), null)).addTypeName(adapter.getTypeName()).indexName(i.getName()).build());
}
}
use of org.locationtech.geowave.core.index.HierarchicalNumericIndexStrategy.SubStrategy in project geowave by locationtech.
the class TieredSpatialJoin method join.
@Override
public void join(final SparkSession spark, final GeoWaveIndexedRDD leftRDD, final GeoWaveIndexedRDD rightRDD, final GeomFunction predicate) throws InterruptedException, ExecutionException {
// Get SparkContext from session
final SparkContext sc = spark.sparkContext();
final JavaSparkContext javaSC = JavaSparkContext.fromSparkContext(sc);
final NumericIndexStrategy leftStrategy = leftRDD.getIndexStrategy().getValue();
final NumericIndexStrategy rightStrategy = rightRDD.getIndexStrategy().getValue();
// Check if either dataset supports the join
TieredSFCIndexStrategy tieredStrategy = null;
// Determine if either strategy needs to be reindexed to support join algorithm
boolean reindexLeft = false;
boolean reindexRight = false;
final boolean leftSupport = supportsJoin(leftStrategy);
final boolean rightSupport = supportsJoin(rightStrategy);
if (leftSupport && rightSupport) {
if (leftStrategy.equals(rightStrategy)) {
// Both strategies match we don't have to reindex
tieredStrategy = (TieredSFCIndexStrategy) leftStrategy;
} else {
// support but don't match
if (getJoinOptions().getJoinBuildSide() == JoinOptions.BuildSide.LEFT) {
reindexRight = true;
tieredStrategy = (TieredSFCIndexStrategy) leftStrategy;
} else {
reindexLeft = true;
tieredStrategy = (TieredSFCIndexStrategy) rightStrategy;
}
}
} else if (leftSupport) {
reindexRight = true;
tieredStrategy = (TieredSFCIndexStrategy) leftStrategy;
} else if (rightSupport) {
reindexLeft = true;
tieredStrategy = (TieredSFCIndexStrategy) rightStrategy;
} else {
tieredStrategy = (TieredSFCIndexStrategy) createDefaultStrategy(leftStrategy);
if (tieredStrategy == null) {
tieredStrategy = (TieredSFCIndexStrategy) createDefaultStrategy(rightStrategy);
}
if (tieredStrategy == null) {
LOGGER.error("Cannot create default strategy from either provided strategy. Datasets cannot be joined.");
return;
}
reindexLeft = true;
reindexRight = true;
}
// Pull information and broadcast strategy used for join
final SubStrategy[] tierStrategies = tieredStrategy.getSubStrategies();
final int tierCount = tierStrategies.length;
// Create broadcast variable for indexing strategy
// Cast is safe because we must be instance of TieredSFCIndexStrategy to support join.
final Broadcast<TieredSFCIndexStrategy> broadcastStrategy = (Broadcast<TieredSFCIndexStrategy>) RDDUtils.broadcastIndexStrategy(sc, tieredStrategy);
final Broadcast<GeomFunction> geomPredicate = javaSC.broadcast(predicate);
// If needed reindex one of the strategies we will wrap the buffer operation into the reindex
// operation
// Otherwise we buffer based off the buildside of the join.
setBufferAmount(predicate.getBufferAmount());
// Reindex if necessary and get RDD of indexed Geometry
JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> leftIndex = null;
JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> rightIndex = null;
if (reindexLeft && reindexRight) {
leftRDD.reindex(broadcastStrategy);
rightRDD.reindex(broadcastStrategy);
} else if (reindexLeft) {
leftRDD.reindex(broadcastStrategy);
} else if (reindexRight) {
rightRDD.reindex(broadcastStrategy);
}
if (joinOpts.getJoinBuildSide() == BuildSide.LEFT) {
rightIndex = rightRDD.getIndexedGeometryRDD(bufferDistance, true);
leftIndex = leftRDD.getIndexedGeometryRDD();
} else {
leftIndex = leftRDD.getIndexedGeometryRDD(bufferDistance, true);
rightIndex = rightRDD.getIndexedGeometryRDD();
}
final int leftPartCount = leftIndex.getNumPartitions();
final int rightPartCount = rightIndex.getNumPartitions();
final int highestPartCount = (leftPartCount > rightPartCount) ? leftPartCount : rightPartCount;
final int largePartitionerCount = (int) (1.5 * highestPartCount);
final HashPartitioner partitioner = new HashPartitioner(largePartitionerCount);
final JavaFutureAction<List<Byte>> leftFuture = leftIndex.setName("LeftIndex").keys().map(t -> t.getBytes()[0]).distinct(4).collectAsync();
final JavaFutureAction<List<Byte>> rightFuture = rightIndex.setName("RightIndex").keys().map(t -> t.getBytes()[0]).distinct(4).collectAsync();
// Get the result of future
final List<Byte> rightDataTiers = Lists.newArrayList(rightFuture.get());
// Sort tiers highest to lowest and collect information.
final Byte[] rightTierArr = rightDataTiers.toArray(new Byte[0]);
Arrays.sort(rightTierArr);
final int rightTierCount = rightTierArr.length;
final List<Byte> leftDataTiers = Lists.newArrayList(leftFuture.get());
final Byte[] leftTierArr = leftDataTiers.toArray(new Byte[0]);
Arrays.sort(leftTierArr);
final int leftTierCount = leftTierArr.length;
// Determine if there are common higher tiers for whole dataset on either side.
final byte highestLeftTier = leftTierArr[leftTierArr.length - 1];
final byte highestRightTier = rightTierArr[rightTierArr.length - 1];
// Find a common run of higher tiers
Byte[] commonLeftTiers = ArrayUtils.EMPTY_BYTE_OBJECT_ARRAY;
Byte[] commonRightTiers = ArrayUtils.EMPTY_BYTE_OBJECT_ARRAY;
boolean skipMapCreate = false;
if (leftTierArr[0] > highestRightTier) {
// Whole left dataset is higher tiers than right
commonLeftTiers = leftTierArr;
skipMapCreate = true;
} else if (rightTierArr[0] > highestLeftTier) {
// Whole right dataset is higher tiers than left
commonRightTiers = rightTierArr;
skipMapCreate = true;
}
LOGGER.debug("Tier Count: " + tierCount);
LOGGER.debug("Left Tier Count: " + leftTierCount + " Right Tier Count: " + rightTierCount);
LOGGER.debug("Left Tiers: " + leftDataTiers);
LOGGER.debug("Right Tiers: " + rightDataTiers);
Map<Byte, HashSet<Byte>> rightReprojectMap = new HashMap<>();
Map<Byte, HashSet<Byte>> leftReprojectMap = new HashMap<>();
final HashSet<Byte> sharedTiers = Sets.newHashSetWithExpectedSize(tierCount / 2);
if (!skipMapCreate) {
leftReprojectMap = createReprojectMap(leftTierArr, rightTierArr, sharedTiers);
rightReprojectMap = createReprojectMap(rightTierArr, leftTierArr, sharedTiers);
}
JavaRDD<Tuple2<GeoWaveInputKey, Geometry>> commonRightRDD = null;
final boolean commonRightExist = commonRightTiers != ArrayUtils.EMPTY_BYTE_OBJECT_ARRAY;
if (commonRightExist) {
commonRightRDD = rightRDD.getGeoWaveRDD().getRawRDD().filter(t -> t._2.getDefaultGeometry() != null).mapValues((Function<SimpleFeature, Geometry>) t -> {
return (Geometry) t.getDefaultGeometry();
}).distinct(largePartitionerCount).rdd().toJavaRDD();
}
JavaRDD<Tuple2<GeoWaveInputKey, Geometry>> commonLeftRDD = null;
final boolean commonLeftExist = commonLeftTiers != ArrayUtils.EMPTY_BYTE_OBJECT_ARRAY;
if (commonLeftExist) {
commonLeftRDD = leftRDD.getGeoWaveRDD().getRawRDD().filter(t -> t._2.getDefaultGeometry() != null).mapValues((Function<SimpleFeature, Geometry>) t -> {
return (Geometry) t.getDefaultGeometry();
}).distinct(largePartitionerCount).rdd().toJavaRDD();
}
// Iterate through left tiers. Joining higher right and same level tiers
for (final Byte leftTierId : leftDataTiers) {
final HashSet<Byte> higherRightTiers = leftReprojectMap.get(leftTierId);
JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> leftTier = null;
final boolean higherTiersExist = ((higherRightTiers != null) && !higherRightTiers.isEmpty());
final boolean sameTierExist = sharedTiers.contains(leftTierId);
if (commonRightExist || higherTiersExist || sameTierExist) {
leftTier = filterTier(leftIndex, leftTierId);
} else {
// No tiers to compare against this tier
continue;
}
// Check for same tier existence on both sides and join without reprojection.
if (sameTierExist) {
final JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> rightTier = rightIndex.filter(t -> t._1().getBytes()[0] == leftTierId);
final JavaPairRDD<GeoWaveInputKey, ByteArray> finalMatches = joinAndCompareTiers(leftTier, rightTier, geomPredicate, highestPartCount, partitioner);
addMatches(finalMatches);
}
// Join against higher common tiers for this dataset
JavaRDD<Tuple2<GeoWaveInputKey, Geometry>> rightTiers = null;
if (commonRightExist) {
rightTiers = commonRightRDD;
} else if (higherTiersExist) {
final Broadcast<HashSet<Byte>> higherBroadcast = javaSC.broadcast(higherRightTiers);
rightTiers = prepareForReproject(rightIndex.filter(t -> higherBroadcast.value().contains(t._1().getBytes()[0])), largePartitionerCount);
}
if (rightTiers != null) {
final JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> reprojected = reprojectToTier(rightTiers, leftTierId, broadcastStrategy, getBufferAmount(BuildSide.RIGHT), partitioner);
final JavaPairRDD<GeoWaveInputKey, ByteArray> finalMatches = joinAndCompareTiers(leftTier, reprojected, geomPredicate, highestPartCount, partitioner);
addMatches(finalMatches);
}
}
for (final Byte rightTierId : rightDataTiers) {
final HashSet<Byte> higherLeftTiers = rightReprojectMap.get(rightTierId);
JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> rightTier = null;
final boolean higherLeftExist = ((higherLeftTiers != null) && !higherLeftTiers.isEmpty());
if (commonLeftExist || higherLeftExist) {
rightTier = rightIndex.filter(t -> t._1().getBytes()[0] == rightTierId);
} else {
// No tiers to compare against this tier
continue;
}
JavaPairRDD<GeoWaveInputKey, ByteArray> finalMatches = null;
JavaRDD<Tuple2<GeoWaveInputKey, Geometry>> leftTiers = null;
if (commonLeftExist) {
leftTiers = commonLeftRDD;
} else {
final Broadcast<HashSet<Byte>> higherBroadcast = javaSC.broadcast(higherLeftTiers);
leftTiers = prepareForReproject(leftIndex.filter(t -> higherBroadcast.value().contains(t._1.getBytes()[0])), largePartitionerCount);
}
final JavaPairRDD<ByteArray, Tuple2<GeoWaveInputKey, Geometry>> reprojected = reprojectToTier(leftTiers, rightTierId, broadcastStrategy, getBufferAmount(BuildSide.LEFT), partitioner);
finalMatches = joinAndCompareTiers(reprojected, rightTier, geomPredicate, highestPartCount, partitioner);
addMatches(finalMatches);
}
// Remove duplicates between tiers
combinedResults = javaSC.union((JavaPairRDD[]) (ArrayUtils.add(tierMatches.toArray(new JavaPairRDD[tierMatches.size()]), combinedResults)));
combinedResults = combinedResults.reduceByKey((id1, id2) -> id1);
combinedResults = combinedResults.setName("CombinedJoinResults").persist(StorageLevel.MEMORY_ONLY_SER());
// Force evaluation of RDD at the join function call.
// Otherwise it doesn't actually perform work until something is called
// on left/right joined.
// Wish there was a better way to force evaluation of rdd safely.
// isEmpty() triggers take(1) which shouldn't involve a shuffle.
combinedResults.isEmpty();
// don't recalculate
if (getJoinOptions().isNegativePredicate()) {
setLeftResults(new GeoWaveRDD(leftRDD.getGeoWaveRDD().getRawRDD().subtractByKey(combinedResults).cache()));
setRightResults(new GeoWaveRDD(rightRDD.getGeoWaveRDD().getRawRDD().subtractByKey(combinedResults).cache()));
} else {
setLeftResults(new GeoWaveRDD(leftRDD.getGeoWaveRDD().getRawRDD().join(combinedResults).mapToPair(t -> new Tuple2<>(t._1(), t._2._1())).cache()));
setRightResults(new GeoWaveRDD(rightRDD.getGeoWaveRDD().getRawRDD().join(combinedResults).mapToPair(t -> new Tuple2<>(t._1(), t._2._1())).cache()));
}
leftIndex.unpersist();
rightIndex.unpersist();
}
Aggregations