use of org.locationtech.geowave.analytic.spark.RDDOptions in project geowave by locationtech.
the class KMeansRunner method run.
public void run() throws IOException {
initContext();
// Validate inputs
if (inputDataStore == null) {
LOGGER.error("You must supply an input datastore!");
throw new IOException("You must supply an input datastore!");
}
if (isUseTime()) {
scaledRange = KMeansUtils.setRunnerTimeParams(this, inputDataStore, typeName);
if (scaledRange == null) {
LOGGER.error("Failed to set time params for kmeans. Please specify a valid feature type.");
throw new ParameterException("--useTime option: Failed to set time params");
}
}
// Retrieve the feature adapters
final VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder();
List<String> featureTypeNames;
// If provided, just use the one
if (typeName != null) {
featureTypeNames = new ArrayList<>();
featureTypeNames.add(typeName);
} else {
// otherwise, grab all the feature adapters
featureTypeNames = FeatureDataUtils.getFeatureTypeNames(inputDataStore);
}
bldr.setTypeNames(featureTypeNames.toArray(new String[0]));
// This is required due to some funkiness in GeoWaveInputFormat
final PersistentAdapterStore adapterStore = inputDataStore.createAdapterStore();
final InternalAdapterStore internalAdapterStore = inputDataStore.createInternalAdapterStore();
// Add a spatial filter if requested
try {
if (cqlFilter != null) {
Geometry bbox = null;
String cqlTypeName;
if (typeName == null) {
cqlTypeName = featureTypeNames.get(0);
} else {
cqlTypeName = typeName;
}
final short adapterId = internalAdapterStore.getAdapterId(cqlTypeName);
final DataTypeAdapter<?> adapter = adapterStore.getAdapter(adapterId).getAdapter();
if (adapter instanceof GeotoolsFeatureDataAdapter) {
final String geometryAttribute = ((GeotoolsFeatureDataAdapter) adapter).getFeatureType().getGeometryDescriptor().getLocalName();
Filter filter;
filter = ECQL.toFilter(cqlFilter);
final ExtractGeometryFilterVisitorResult geoAndCompareOpData = (ExtractGeometryFilterVisitorResult) filter.accept(new ExtractGeometryFilterVisitor(GeometryUtils.getDefaultCRS(), geometryAttribute), null);
bbox = geoAndCompareOpData.getGeometry();
}
if ((bbox != null) && !bbox.equals(GeometryUtils.infinity())) {
bldr.constraints(bldr.constraintsFactory().spatialTemporalConstraints().spatialConstraints(bbox).build());
}
}
} catch (final CQLException e) {
LOGGER.error("Unable to parse CQL: " + cqlFilter);
}
// Load RDD from datastore
final RDDOptions kmeansOpts = new RDDOptions();
kmeansOpts.setMinSplits(minSplits);
kmeansOpts.setMaxSplits(maxSplits);
kmeansOpts.setQuery(bldr.build());
final GeoWaveRDD kmeansRDD = GeoWaveRDDLoader.loadRDD(session.sparkContext(), inputDataStore, kmeansOpts);
// Retrieve the input centroids
LOGGER.debug("Retrieving input centroids from RDD...");
centroidVectors = RDDUtils.rddFeatureVectors(kmeansRDD, timeField, scaledTimeRange);
centroidVectors.cache();
// Init the algorithm
final KMeans kmeans = new KMeans();
kmeans.setInitializationMode("kmeans||");
kmeans.setK(numClusters);
kmeans.setMaxIterations(numIterations);
if (epsilon > -1.0) {
kmeans.setEpsilon(epsilon);
}
// Run KMeans
LOGGER.debug("Running KMeans algorithm...");
outputModel = kmeans.run(centroidVectors.rdd());
LOGGER.debug("Writing results to output store...");
writeToOutputStore();
LOGGER.debug("Results successfully written!");
}
use of org.locationtech.geowave.analytic.spark.RDDOptions in project geowave by locationtech.
the class RasterTileResizeSparkRunner method run.
public void run() throws IOException {
initContext();
// Validate inputs
if (inputStoreOptions == null) {
LOGGER.error("You must supply an input datastore!");
throw new IOException("You must supply an input datastore!");
}
final InternalAdapterStore internalAdapterStore = inputStoreOptions.createInternalAdapterStore();
final short internalAdapterId = internalAdapterStore.getAdapterId(rasterResizeOptions.getInputCoverageName());
final DataTypeAdapter adapter = inputStoreOptions.createAdapterStore().getAdapter(internalAdapterId).getAdapter();
if (adapter == null) {
throw new IllegalArgumentException("Adapter for coverage '" + rasterResizeOptions.getInputCoverageName() + "' does not exist in namespace '" + inputStoreOptions.getGeoWaveNamespace() + "'");
}
Index index = null;
final IndexStore indexStore = inputStoreOptions.createIndexStore();
if (rasterResizeOptions.getIndexName() != null) {
index = indexStore.getIndex(rasterResizeOptions.getIndexName());
}
if (index == null) {
try (CloseableIterator<Index> indices = indexStore.getIndices()) {
index = indices.next();
}
if (index == null) {
throw new IllegalArgumentException("Index does not exist in namespace '" + inputStoreOptions.getGeoWaveNamespace() + "'");
}
}
final RasterDataAdapter newAdapter = new RasterDataAdapter((RasterDataAdapter) adapter, rasterResizeOptions.getOutputCoverageName(), rasterResizeOptions.getOutputTileSize());
final DataStore store = outputStoreOptions.createDataStore();
store.addType(newAdapter, index);
final short newInternalAdapterId = outputStoreOptions.createInternalAdapterStore().addTypeName(newAdapter.getTypeName());
final RDDOptions options = new RDDOptions();
if (rasterResizeOptions.getMinSplits() != null) {
options.setMinSplits(rasterResizeOptions.getMinSplits());
}
if (rasterResizeOptions.getMaxSplits() != null) {
options.setMaxSplits(rasterResizeOptions.getMaxSplits());
}
final JavaPairRDD<GeoWaveInputKey, GridCoverage> inputRDD = GeoWaveRDDLoader.loadRawRasterRDD(jsc.sc(), inputStoreOptions, index.getName(), rasterResizeOptions.getMinSplits(), rasterResizeOptions.getMaxSplits());
LOGGER.debug("Writing results to output store...");
RDDUtils.writeRasterToGeoWave(jsc.sc(), index, outputStoreOptions, newAdapter, inputRDD.flatMapToPair(new RasterResizeMappingFunction(internalAdapterId, newInternalAdapterId, newAdapter, index)).groupByKey().map(new MergeRasterFunction(internalAdapterId, newInternalAdapterId, newAdapter, index)));
LOGGER.debug("Results successfully written!");
}
use of org.locationtech.geowave.analytic.spark.RDDOptions in project geowave by locationtech.
the class KDERunner method run.
public void run() throws IOException {
initContext();
// Validate inputs
if (inputDataStore == null) {
LOGGER.error("You must supply an input datastore!");
throw new IOException("You must supply an input datastore!");
}
// Retrieve the feature adapters
final VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder();
List<String> featureTypeNames;
// If provided, just use the one
if (typeName != null) {
featureTypeNames = new ArrayList<>();
featureTypeNames.add(typeName);
} else {
// otherwise, grab all the feature adapters
featureTypeNames = FeatureDataUtils.getFeatureTypeNames(inputDataStore);
}
bldr.setTypeNames(featureTypeNames.toArray(new String[0]));
if (indexName != null) {
bldr.indexName(indexName);
}
Index inputPrimaryIndex = null;
final Index[] idxArray = inputDataStore.createDataStore().getIndices();
for (final Index idx : idxArray) {
if ((idx != null) && ((indexName == null) || indexName.equals(idx.getName()))) {
inputPrimaryIndex = idx;
break;
}
}
final CoordinateReferenceSystem inputIndexCrs = GeometryUtils.getIndexCrs(inputPrimaryIndex);
final String inputCrsCode = GeometryUtils.getCrsCode(inputIndexCrs);
Index outputPrimaryIndex = outputIndex;
CoordinateReferenceSystem outputIndexCrs = null;
final String outputCrsCode;
if (outputPrimaryIndex != null) {
outputIndexCrs = GeometryUtils.getIndexCrs(outputPrimaryIndex);
outputCrsCode = GeometryUtils.getCrsCode(outputIndexCrs);
} else {
final SpatialDimensionalityTypeProvider sdp = new SpatialDimensionalityTypeProvider();
final SpatialOptions so = sdp.createOptions();
so.setCrs(inputCrsCode);
outputPrimaryIndex = SpatialDimensionalityTypeProvider.createIndexFromOptions(so);
outputIndexCrs = inputIndexCrs;
outputCrsCode = inputCrsCode;
}
final CoordinateSystem cs = outputIndexCrs.getCoordinateSystem();
final CoordinateSystemAxis csx = cs.getAxis(0);
final CoordinateSystemAxis csy = cs.getAxis(1);
final double xMax = csx.getMaximumValue();
final double xMin = csx.getMinimumValue();
final double yMax = csy.getMaximumValue();
final double yMin = csy.getMinimumValue();
if ((xMax == Double.POSITIVE_INFINITY) || (xMin == Double.NEGATIVE_INFINITY) || (yMax == Double.POSITIVE_INFINITY) || (yMin == Double.NEGATIVE_INFINITY)) {
LOGGER.error("Raster KDE resize with raster primary index CRS dimensions min/max equal to positive infinity or negative infinity is not supported");
throw new RuntimeException("Raster KDE resize with raster primary index CRS dimensions min/max equal to positive infinity or negative infinity is not supported");
}
if (cqlFilter != null) {
bldr.constraints(bldr.constraintsFactory().cqlConstraints(cqlFilter));
}
// Load RDD from datastore
final RDDOptions kdeOpts = new RDDOptions();
kdeOpts.setMinSplits(minSplits);
kdeOpts.setMaxSplits(maxSplits);
kdeOpts.setQuery(bldr.build());
final Function<Double, Double> identity = x -> x;
final Function2<Double, Double, Double> sum = (final Double x, final Double y) -> {
return x + y;
};
final RasterDataAdapter adapter = RasterUtils.createDataAdapterTypeDouble(coverageName, KDEReducer.NUM_BANDS, tileSize, MINS_PER_BAND, MAXES_PER_BAND, NAME_PER_BAND, new NoDataMergeStrategy());
outputDataStore.createDataStore().addType(adapter, outputPrimaryIndex);
// The following "inner" variables are created to give access to member
// variables within lambda
// expressions
// tileSize;
final int innerTileSize = 1;
final String innerCoverageName = coverageName;
for (int level = minLevel; level <= maxLevel; level++) {
final int numXTiles = (int) Math.pow(2, level + 1);
final int numYTiles = (int) Math.pow(2, level);
// * tileSize;
final int numXPosts = numXTiles;
// * tileSize;
final int numYPosts = numYTiles;
final GeoWaveRDD kdeRDD = GeoWaveRDDLoader.loadRDD(session.sparkContext(), inputDataStore, kdeOpts);
JavaPairRDD<Double, Long> cells = kdeRDD.getRawRDD().flatMapToPair(new GeoWaveCellMapper(numXPosts, numYPosts, xMin, xMax, yMin, yMax, inputCrsCode, outputCrsCode)).combineByKey(identity, sum, sum).mapToPair(item -> item.swap());
cells = cells.partitionBy(new RangePartitioner(cells.getNumPartitions(), cells.rdd(), true, scala.math.Ordering.Double$.MODULE$, scala.reflect.ClassTag$.MODULE$.apply(Double.class))).sortByKey(false).cache();
final long count = cells.count();
if (count == 0) {
LOGGER.warn("No cells produced by KDE");
continue;
}
final double max = cells.first()._1;
JavaRDD<GridCoverage> rdd = cells.zipWithIndex().map(t -> {
final TileInfo tileInfo = fromCellIndexToTileInfo(t._1._2, numXPosts, numYPosts, numXTiles, numYTiles, xMin, xMax, yMin, yMax, innerTileSize);
final WritableRaster raster = RasterUtils.createRasterTypeDouble(NUM_BANDS, innerTileSize);
final double normalizedValue = t._1._1 / max;
// because we are using a Double as the key, the ordering
// isn't always completely reproducible as Double equals does not
// take into account an epsilon
final double percentile = (count - t._2) / ((double) count);
raster.setSample(tileInfo.x, tileInfo.y, 0, t._1._1);
raster.setSample(tileInfo.x, tileInfo.y, 1, normalizedValue);
raster.setSample(tileInfo.x, tileInfo.y, 2, percentile);
return RasterUtils.createCoverageTypeDouble(innerCoverageName, tileInfo.tileWestLon, tileInfo.tileEastLon, tileInfo.tileSouthLat, tileInfo.tileNorthLat, MINS_PER_BAND, MAXES_PER_BAND, NAME_PER_BAND, raster, GeometryUtils.DEFAULT_CRS_STR);
});
LOGGER.debug("Writing results to output store...");
if (tileSize > 1) {
// byte[] adapterBytes = PersistenceUtils.toBinary(adapter);
// byte[] indexBytes = PersistenceUtils.toBinary(outputPrimaryIndex);
rdd = rdd.flatMapToPair(new TransformTileSize(adapter, outputPrimaryIndex)).groupByKey().map(new MergeOverlappingTiles(adapter, outputPrimaryIndex));
}
RDDUtils.writeRasterToGeoWave(jsc.sc(), outputPrimaryIndex, outputDataStore, adapter, rdd);
LOGGER.debug("Results successfully written!");
}
}
use of org.locationtech.geowave.analytic.spark.RDDOptions in project geowave by locationtech.
the class SpatialJoinRunner method createRDDFromOptions.
private GeoWaveIndexedRDD createRDDFromOptions(final DataStorePluginOptions storeOptions, String adapterTypeName, final InternalAdapterStore internalAdapterStore, final IndexStore indexStore) throws IOException {
// available.
if (adapterTypeName == null) {
final List<String> typeNames = FeatureDataUtils.getFeatureTypeNames(storeOptions);
if (!typeNames.isEmpty()) {
adapterTypeName = typeNames.get(0);
} else {
LOGGER.error("No valid adapter found in store to perform join.");
return null;
}
}
final RDDOptions rddOpts = new RDDOptions();
rddOpts.setQuery(QueryBuilder.newBuilder().addTypeName(adapterTypeName).build());
rddOpts.setMinSplits(partCount);
rddOpts.setMaxSplits(partCount);
NumericIndexStrategy rddStrategy = null;
// Did the user provide a strategy for join?
if (indexStrategy == null) {
final Index[] rddIndices = getIndicesForAdapter(storeOptions, adapterTypeName, internalAdapterStore, indexStore);
if (rddIndices.length > 0) {
rddStrategy = rddIndices[0].getIndexStrategy();
}
} else {
rddStrategy = indexStrategy;
}
return GeoWaveRDDLoader.loadIndexedRDD(sc, storeOptions, rddOpts, rddStrategy);
}
use of org.locationtech.geowave.analytic.spark.RDDOptions in project geowave by locationtech.
the class SparkUtils method verifyQuery.
public static void verifyQuery(final DataStorePluginOptions dataStore, final SparkContext context, final URL filterFile, final URL[] expectedResultsFiles, final String name, final CoordinateReferenceSystem crsTransform, final Pair<String, String> optimalCqlQueryGeometryAndTimeFields, final boolean useDuring) {
try {
// get expected results
final ExpectedResults expectedResults = TestUtils.getExpectedResults(expectedResultsFiles);
QueryConstraints query;
if (crsTransform != null) {
final SimpleFeature feature = TestUtils.resourceToFeature(filterFile);
query = TestUtils.featureToQuery(GeometryUtils.crsTransform(feature, SimpleFeatureTypeBuilder.retype(feature.getFeatureType(), crsTransform), CRS.findMathTransform(GeometryUtils.getDefaultCRS(), crsTransform, true)), null, GeometryUtils.getCrsCode(crsTransform), useDuring);
} else {
query = TestUtils.resourceToQuery(filterFile, optimalCqlQueryGeometryAndTimeFields, useDuring);
}
// Load RDD using spatial query (bbox)
final RDDOptions queryOpts = new RDDOptions();
queryOpts.setQuery(QueryBuilder.newBuilder().constraints(query).build());
queryOpts.setMinSplits(DEFAULT_SPLITS_FOR_COUNT);
queryOpts.setMaxSplits(DEFAULT_SPLITS_FOR_COUNT);
final GeoWaveRDD newRDD = GeoWaveRDDLoader.loadRDD(context, dataStore, queryOpts);
final JavaPairRDD<GeoWaveInputKey, SimpleFeature> javaRdd = newRDD.getRawRDD();
final long count = getCount(javaRdd, dataStore.getType());
LOGGER.warn("DataStore loaded into RDD with " + count + " features.");
// Verify RDD count matches expected count
Assert.assertEquals(expectedResults.count, count);
} catch (final Exception e) {
e.printStackTrace();
TestUtils.deleteAll(dataStore);
Assert.fail("Error occurred while testing '" + name + "'");
}
}
Aggregations