Search in sources :

Example 1 with GeoWaveRDD

use of org.locationtech.geowave.analytic.spark.GeoWaveRDD in project geowave by locationtech.

the class KMeansRunner method run.

public void run() throws IOException {
    initContext();
    // Validate inputs
    if (inputDataStore == null) {
        LOGGER.error("You must supply an input datastore!");
        throw new IOException("You must supply an input datastore!");
    }
    if (isUseTime()) {
        scaledRange = KMeansUtils.setRunnerTimeParams(this, inputDataStore, typeName);
        if (scaledRange == null) {
            LOGGER.error("Failed to set time params for kmeans. Please specify a valid feature type.");
            throw new ParameterException("--useTime option: Failed to set time params");
        }
    }
    // Retrieve the feature adapters
    final VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder();
    List<String> featureTypeNames;
    // If provided, just use the one
    if (typeName != null) {
        featureTypeNames = new ArrayList<>();
        featureTypeNames.add(typeName);
    } else {
        // otherwise, grab all the feature adapters
        featureTypeNames = FeatureDataUtils.getFeatureTypeNames(inputDataStore);
    }
    bldr.setTypeNames(featureTypeNames.toArray(new String[0]));
    // This is required due to some funkiness in GeoWaveInputFormat
    final PersistentAdapterStore adapterStore = inputDataStore.createAdapterStore();
    final InternalAdapterStore internalAdapterStore = inputDataStore.createInternalAdapterStore();
    // Add a spatial filter if requested
    try {
        if (cqlFilter != null) {
            Geometry bbox = null;
            String cqlTypeName;
            if (typeName == null) {
                cqlTypeName = featureTypeNames.get(0);
            } else {
                cqlTypeName = typeName;
            }
            final short adapterId = internalAdapterStore.getAdapterId(cqlTypeName);
            final DataTypeAdapter<?> adapter = adapterStore.getAdapter(adapterId).getAdapter();
            if (adapter instanceof GeotoolsFeatureDataAdapter) {
                final String geometryAttribute = ((GeotoolsFeatureDataAdapter) adapter).getFeatureType().getGeometryDescriptor().getLocalName();
                Filter filter;
                filter = ECQL.toFilter(cqlFilter);
                final ExtractGeometryFilterVisitorResult geoAndCompareOpData = (ExtractGeometryFilterVisitorResult) filter.accept(new ExtractGeometryFilterVisitor(GeometryUtils.getDefaultCRS(), geometryAttribute), null);
                bbox = geoAndCompareOpData.getGeometry();
            }
            if ((bbox != null) && !bbox.equals(GeometryUtils.infinity())) {
                bldr.constraints(bldr.constraintsFactory().spatialTemporalConstraints().spatialConstraints(bbox).build());
            }
        }
    } catch (final CQLException e) {
        LOGGER.error("Unable to parse CQL: " + cqlFilter);
    }
    // Load RDD from datastore
    final RDDOptions kmeansOpts = new RDDOptions();
    kmeansOpts.setMinSplits(minSplits);
    kmeansOpts.setMaxSplits(maxSplits);
    kmeansOpts.setQuery(bldr.build());
    final GeoWaveRDD kmeansRDD = GeoWaveRDDLoader.loadRDD(session.sparkContext(), inputDataStore, kmeansOpts);
    // Retrieve the input centroids
    LOGGER.debug("Retrieving input centroids from RDD...");
    centroidVectors = RDDUtils.rddFeatureVectors(kmeansRDD, timeField, scaledTimeRange);
    centroidVectors.cache();
    // Init the algorithm
    final KMeans kmeans = new KMeans();
    kmeans.setInitializationMode("kmeans||");
    kmeans.setK(numClusters);
    kmeans.setMaxIterations(numIterations);
    if (epsilon > -1.0) {
        kmeans.setEpsilon(epsilon);
    }
    // Run KMeans
    LOGGER.debug("Running KMeans algorithm...");
    outputModel = kmeans.run(centroidVectors.rdd());
    LOGGER.debug("Writing results to output store...");
    writeToOutputStore();
    LOGGER.debug("Results successfully written!");
}
Also used : VectorQueryBuilder(org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder) InternalAdapterStore(org.locationtech.geowave.core.store.adapter.InternalAdapterStore) KMeans(org.apache.spark.mllib.clustering.KMeans) IOException(java.io.IOException) RDDOptions(org.locationtech.geowave.analytic.spark.RDDOptions) Geometry(org.locationtech.jts.geom.Geometry) PersistentAdapterStore(org.locationtech.geowave.core.store.adapter.PersistentAdapterStore) GeotoolsFeatureDataAdapter(org.locationtech.geowave.core.geotime.store.GeotoolsFeatureDataAdapter) Filter(org.opengis.filter.Filter) ExtractGeometryFilterVisitorResult(org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitorResult) ParameterException(com.beust.jcommander.ParameterException) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) CQLException(org.geotools.filter.text.cql2.CQLException) ExtractGeometryFilterVisitor(org.locationtech.geowave.core.geotime.util.ExtractGeometryFilterVisitor)

Example 2 with GeoWaveRDD

use of org.locationtech.geowave.analytic.spark.GeoWaveRDD in project geowave by locationtech.

the class KDERunner method run.

public void run() throws IOException {
    initContext();
    // Validate inputs
    if (inputDataStore == null) {
        LOGGER.error("You must supply an input datastore!");
        throw new IOException("You must supply an input datastore!");
    }
    // Retrieve the feature adapters
    final VectorQueryBuilder bldr = VectorQueryBuilder.newBuilder();
    List<String> featureTypeNames;
    // If provided, just use the one
    if (typeName != null) {
        featureTypeNames = new ArrayList<>();
        featureTypeNames.add(typeName);
    } else {
        // otherwise, grab all the feature adapters
        featureTypeNames = FeatureDataUtils.getFeatureTypeNames(inputDataStore);
    }
    bldr.setTypeNames(featureTypeNames.toArray(new String[0]));
    if (indexName != null) {
        bldr.indexName(indexName);
    }
    Index inputPrimaryIndex = null;
    final Index[] idxArray = inputDataStore.createDataStore().getIndices();
    for (final Index idx : idxArray) {
        if ((idx != null) && ((indexName == null) || indexName.equals(idx.getName()))) {
            inputPrimaryIndex = idx;
            break;
        }
    }
    final CoordinateReferenceSystem inputIndexCrs = GeometryUtils.getIndexCrs(inputPrimaryIndex);
    final String inputCrsCode = GeometryUtils.getCrsCode(inputIndexCrs);
    Index outputPrimaryIndex = outputIndex;
    CoordinateReferenceSystem outputIndexCrs = null;
    final String outputCrsCode;
    if (outputPrimaryIndex != null) {
        outputIndexCrs = GeometryUtils.getIndexCrs(outputPrimaryIndex);
        outputCrsCode = GeometryUtils.getCrsCode(outputIndexCrs);
    } else {
        final SpatialDimensionalityTypeProvider sdp = new SpatialDimensionalityTypeProvider();
        final SpatialOptions so = sdp.createOptions();
        so.setCrs(inputCrsCode);
        outputPrimaryIndex = SpatialDimensionalityTypeProvider.createIndexFromOptions(so);
        outputIndexCrs = inputIndexCrs;
        outputCrsCode = inputCrsCode;
    }
    final CoordinateSystem cs = outputIndexCrs.getCoordinateSystem();
    final CoordinateSystemAxis csx = cs.getAxis(0);
    final CoordinateSystemAxis csy = cs.getAxis(1);
    final double xMax = csx.getMaximumValue();
    final double xMin = csx.getMinimumValue();
    final double yMax = csy.getMaximumValue();
    final double yMin = csy.getMinimumValue();
    if ((xMax == Double.POSITIVE_INFINITY) || (xMin == Double.NEGATIVE_INFINITY) || (yMax == Double.POSITIVE_INFINITY) || (yMin == Double.NEGATIVE_INFINITY)) {
        LOGGER.error("Raster KDE resize with raster primary index CRS dimensions min/max equal to positive infinity or negative infinity is not supported");
        throw new RuntimeException("Raster KDE resize with raster primary index CRS dimensions min/max equal to positive infinity or negative infinity is not supported");
    }
    if (cqlFilter != null) {
        bldr.constraints(bldr.constraintsFactory().cqlConstraints(cqlFilter));
    }
    // Load RDD from datastore
    final RDDOptions kdeOpts = new RDDOptions();
    kdeOpts.setMinSplits(minSplits);
    kdeOpts.setMaxSplits(maxSplits);
    kdeOpts.setQuery(bldr.build());
    final Function<Double, Double> identity = x -> x;
    final Function2<Double, Double, Double> sum = (final Double x, final Double y) -> {
        return x + y;
    };
    final RasterDataAdapter adapter = RasterUtils.createDataAdapterTypeDouble(coverageName, KDEReducer.NUM_BANDS, tileSize, MINS_PER_BAND, MAXES_PER_BAND, NAME_PER_BAND, new NoDataMergeStrategy());
    outputDataStore.createDataStore().addType(adapter, outputPrimaryIndex);
    // The following "inner" variables are created to give access to member
    // variables within lambda
    // expressions
    // tileSize;
    final int innerTileSize = 1;
    final String innerCoverageName = coverageName;
    for (int level = minLevel; level <= maxLevel; level++) {
        final int numXTiles = (int) Math.pow(2, level + 1);
        final int numYTiles = (int) Math.pow(2, level);
        // * tileSize;
        final int numXPosts = numXTiles;
        // * tileSize;
        final int numYPosts = numYTiles;
        final GeoWaveRDD kdeRDD = GeoWaveRDDLoader.loadRDD(session.sparkContext(), inputDataStore, kdeOpts);
        JavaPairRDD<Double, Long> cells = kdeRDD.getRawRDD().flatMapToPair(new GeoWaveCellMapper(numXPosts, numYPosts, xMin, xMax, yMin, yMax, inputCrsCode, outputCrsCode)).combineByKey(identity, sum, sum).mapToPair(item -> item.swap());
        cells = cells.partitionBy(new RangePartitioner(cells.getNumPartitions(), cells.rdd(), true, scala.math.Ordering.Double$.MODULE$, scala.reflect.ClassTag$.MODULE$.apply(Double.class))).sortByKey(false).cache();
        final long count = cells.count();
        if (count == 0) {
            LOGGER.warn("No cells produced by KDE");
            continue;
        }
        final double max = cells.first()._1;
        JavaRDD<GridCoverage> rdd = cells.zipWithIndex().map(t -> {
            final TileInfo tileInfo = fromCellIndexToTileInfo(t._1._2, numXPosts, numYPosts, numXTiles, numYTiles, xMin, xMax, yMin, yMax, innerTileSize);
            final WritableRaster raster = RasterUtils.createRasterTypeDouble(NUM_BANDS, innerTileSize);
            final double normalizedValue = t._1._1 / max;
            // because we are using a Double as the key, the ordering
            // isn't always completely reproducible as Double equals does not
            // take into account an epsilon
            final double percentile = (count - t._2) / ((double) count);
            raster.setSample(tileInfo.x, tileInfo.y, 0, t._1._1);
            raster.setSample(tileInfo.x, tileInfo.y, 1, normalizedValue);
            raster.setSample(tileInfo.x, tileInfo.y, 2, percentile);
            return RasterUtils.createCoverageTypeDouble(innerCoverageName, tileInfo.tileWestLon, tileInfo.tileEastLon, tileInfo.tileSouthLat, tileInfo.tileNorthLat, MINS_PER_BAND, MAXES_PER_BAND, NAME_PER_BAND, raster, GeometryUtils.DEFAULT_CRS_STR);
        });
        LOGGER.debug("Writing results to output store...");
        if (tileSize > 1) {
            // byte[] adapterBytes = PersistenceUtils.toBinary(adapter);
            // byte[] indexBytes = PersistenceUtils.toBinary(outputPrimaryIndex);
            rdd = rdd.flatMapToPair(new TransformTileSize(adapter, outputPrimaryIndex)).groupByKey().map(new MergeOverlappingTiles(adapter, outputPrimaryIndex));
        }
        RDDUtils.writeRasterToGeoWave(jsc.sc(), outputPrimaryIndex, outputDataStore, adapter, rdd);
        LOGGER.debug("Results successfully written!");
    }
}
Also used : FactoryException(org.opengis.referencing.FactoryException) Arrays(java.util.Arrays) CRS(org.geotools.referencing.CRS) Function2(org.apache.spark.api.java.function.Function2) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) CoordinateSystemAxis(org.opengis.referencing.cs.CoordinateSystemAxis) PairFlatMapFunction(org.apache.spark.api.java.function.PairFlatMapFunction) PersistenceUtils(org.locationtech.geowave.core.index.persist.PersistenceUtils) URISyntaxException(java.net.URISyntaxException) GaussianFilter(org.locationtech.geowave.analytic.mapreduce.kde.GaussianFilter) ObjectInputStream(java.io.ObjectInputStream) LoggerFactory(org.slf4j.LoggerFactory) FitToIndexGridCoverage(org.locationtech.geowave.adapter.raster.FitToIndexGridCoverage) SimpleFeature(org.opengis.feature.simple.SimpleFeature) KDEReducer(org.locationtech.geowave.analytic.mapreduce.kde.KDEReducer) JTS(org.geotools.geometry.jts.JTS) TransformException(org.opengis.referencing.operation.TransformException) GeoWaveSparkConf(org.locationtech.geowave.analytic.spark.GeoWaveSparkConf) NoDataMergeStrategy(org.locationtech.geowave.adapter.raster.adapter.merge.nodata.NoDataMergeStrategy) Point(org.locationtech.jts.geom.Point) HadoopWritableSerializer(org.locationtech.geowave.mapreduce.HadoopWritableSerializer) GeometryUtils(org.locationtech.geowave.core.geotime.util.GeometryUtils) Tuple2(scala.Tuple2) Serializable(java.io.Serializable) List(java.util.List) Geometry(org.locationtech.jts.geom.Geometry) Function(org.apache.spark.api.java.function.Function) FilenameUtils(org.apache.commons.io.FilenameUtils) CoordinateReferenceSystem(org.opengis.referencing.crs.CoordinateReferenceSystem) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) GridCoverageWritable(org.locationtech.geowave.adapter.raster.adapter.GridCoverageWritable) CoordinateSystem(org.opengis.referencing.cs.CoordinateSystem) RasterDataAdapter(org.locationtech.geowave.adapter.raster.adapter.RasterDataAdapter) GeoWaveInputKey(org.locationtech.geowave.mapreduce.input.GeoWaveInputKey) VectorQueryBuilder(org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder) MismatchedDimensionException(org.opengis.geometry.MismatchedDimensionException) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) FeatureDataUtils(org.locationtech.geowave.adapter.vector.util.FeatureDataUtils) SpatialDimensionalityTypeProvider(org.locationtech.geowave.core.geotime.index.SpatialDimensionalityTypeProvider) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) GeoWaveRDDLoader(org.locationtech.geowave.analytic.spark.GeoWaveRDDLoader) ObjectOutputStream(java.io.ObjectOutputStream) RasterUtils(org.locationtech.geowave.adapter.raster.RasterUtils) Index(org.locationtech.geowave.core.store.api.Index) JavaRDD(org.apache.spark.api.java.JavaRDD) SparkSession(org.apache.spark.sql.SparkSession) RDDUtils(org.locationtech.geowave.analytic.spark.RDDUtils) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) GridCoverage(org.opengis.coverage.grid.GridCoverage) RangePartitioner(org.apache.spark.RangePartitioner) IOException(java.io.IOException) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) RDDOptions(org.locationtech.geowave.analytic.spark.RDDOptions) CellCounter(org.locationtech.geowave.analytic.mapreduce.kde.CellCounter) SpatialOptions(org.locationtech.geowave.core.geotime.index.SpatialOptions) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) MathTransform(org.opengis.referencing.operation.MathTransform) ClientMergeableRasterTile(org.locationtech.geowave.adapter.raster.adapter.ClientMergeableRasterTile) WritableRaster(java.awt.image.WritableRaster) VectorQueryBuilder(org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder) CoordinateSystem(org.opengis.referencing.cs.CoordinateSystem) CoordinateSystemAxis(org.opengis.referencing.cs.CoordinateSystemAxis) Index(org.locationtech.geowave.core.store.api.Index) SpatialDimensionalityTypeProvider(org.locationtech.geowave.core.geotime.index.SpatialDimensionalityTypeProvider) WritableRaster(java.awt.image.WritableRaster) RangePartitioner(org.apache.spark.RangePartitioner) CoordinateReferenceSystem(org.opengis.referencing.crs.CoordinateReferenceSystem) IOException(java.io.IOException) SpatialOptions(org.locationtech.geowave.core.geotime.index.SpatialOptions) RDDOptions(org.locationtech.geowave.analytic.spark.RDDOptions) Point(org.locationtech.jts.geom.Point) RasterDataAdapter(org.locationtech.geowave.adapter.raster.adapter.RasterDataAdapter) NoDataMergeStrategy(org.locationtech.geowave.adapter.raster.adapter.merge.nodata.NoDataMergeStrategy) FitToIndexGridCoverage(org.locationtech.geowave.adapter.raster.FitToIndexGridCoverage) GridCoverage(org.opengis.coverage.grid.GridCoverage) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD)

Example 3 with GeoWaveRDD

use of org.locationtech.geowave.analytic.spark.GeoWaveRDD in project geowave by locationtech.

the class SparkUtils method verifyQuery.

public static void verifyQuery(final DataStorePluginOptions dataStore, final SparkContext context, final URL filterFile, final URL[] expectedResultsFiles, final String name, final CoordinateReferenceSystem crsTransform, final Pair<String, String> optimalCqlQueryGeometryAndTimeFields, final boolean useDuring) {
    try {
        // get expected results
        final ExpectedResults expectedResults = TestUtils.getExpectedResults(expectedResultsFiles);
        QueryConstraints query;
        if (crsTransform != null) {
            final SimpleFeature feature = TestUtils.resourceToFeature(filterFile);
            query = TestUtils.featureToQuery(GeometryUtils.crsTransform(feature, SimpleFeatureTypeBuilder.retype(feature.getFeatureType(), crsTransform), CRS.findMathTransform(GeometryUtils.getDefaultCRS(), crsTransform, true)), null, GeometryUtils.getCrsCode(crsTransform), useDuring);
        } else {
            query = TestUtils.resourceToQuery(filterFile, optimalCqlQueryGeometryAndTimeFields, useDuring);
        }
        // Load RDD using spatial query (bbox)
        final RDDOptions queryOpts = new RDDOptions();
        queryOpts.setQuery(QueryBuilder.newBuilder().constraints(query).build());
        queryOpts.setMinSplits(DEFAULT_SPLITS_FOR_COUNT);
        queryOpts.setMaxSplits(DEFAULT_SPLITS_FOR_COUNT);
        final GeoWaveRDD newRDD = GeoWaveRDDLoader.loadRDD(context, dataStore, queryOpts);
        final JavaPairRDD<GeoWaveInputKey, SimpleFeature> javaRdd = newRDD.getRawRDD();
        final long count = getCount(javaRdd, dataStore.getType());
        LOGGER.warn("DataStore loaded into RDD with " + count + " features.");
        // Verify RDD count matches expected count
        Assert.assertEquals(expectedResults.count, count);
    } catch (final Exception e) {
        e.printStackTrace();
        TestUtils.deleteAll(dataStore);
        Assert.fail("Error occurred while testing '" + name + "'");
    }
}
Also used : ExpectedResults(org.locationtech.geowave.test.TestUtils.ExpectedResults) QueryConstraints(org.locationtech.geowave.core.store.query.constraints.QueryConstraints) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) GeoWaveInputKey(org.locationtech.geowave.mapreduce.input.GeoWaveInputKey) SimpleFeature(org.opengis.feature.simple.SimpleFeature) RDDOptions(org.locationtech.geowave.analytic.spark.RDDOptions)

Example 4 with GeoWaveRDD

use of org.locationtech.geowave.analytic.spark.GeoWaveRDD in project geowave by locationtech.

the class GeoWaveBasicSparkIT method testLoadRDD.

@Test
public void testLoadRDD() throws Exception {
    // Set up Spark
    final SparkContext context = SparkTestEnvironment.getInstance().getDefaultSession().sparkContext();
    TestUtils.deleteAll(dataStore);
    // test spatial temporal queries with spatial index for tornado tracks
    TestUtils.testLocalIngest(dataStore, DimensionalityType.SPATIAL, TORNADO_TRACKS_SHAPEFILE_FILE, 1);
    verifyQuery(context, TEST_BOX_TEMPORAL_FILTER_FILE, TORNADO_TRACKS_EXPECTED_BOX_TEMPORAL_FILTER_RESULTS_FILE, "bounding box tornado tracks spatial-temporal query with spatial only index", true);
    verifyQuery(context, TEST_POLYGON_TEMPORAL_FILTER_FILE, TORNADO_TRACKS_EXPECTED_POLYGON_TEMPORAL_FILTER_RESULTS_FILE, "polygon tornado tracks spatial-temporal query with spatial only index", false);
    TestUtils.deleteAll(dataStore);
    // test spatial queries with spatial temporal index for tornado tracks
    TestUtils.testLocalIngest(dataStore, DimensionalityType.SPATIAL_TEMPORAL, TORNADO_TRACKS_SHAPEFILE_FILE, 1);
    verifyQuery(context, TEST_BOX_FILTER_FILE, TORNADO_TRACKS_EXPECTED_BOX_FILTER_RESULTS_FILE, "bounding box tornado tracks spatial query with spatial temporal index only", true);
    verifyQuery(context, TEST_POLYGON_FILTER_FILE, TORNADO_TRACKS_EXPECTED_POLYGON_FILTER_RESULTS_FILE, "polygon tornado tracks spatial query with spatial temporal index only", true);
    TestUtils.deleteAll(dataStore);
    // test spatial temporal queries with temporal index for tornado tracks
    TestUtils.testLocalIngest(dataStore, DimensionalityType.TEMPORAL, TORNADO_TRACKS_SHAPEFILE_FILE, 1);
    verifyQuery(context, TEST_BOX_TEMPORAL_FILTER_FILE, TORNADO_TRACKS_EXPECTED_BOX_TEMPORAL_FILTER_RESULTS_FILE, "bounding box tornado tracks spatial-temporal query with temporal only index", false);
    verifyQuery(context, TEST_POLYGON_TEMPORAL_FILTER_FILE, TORNADO_TRACKS_EXPECTED_POLYGON_TEMPORAL_FILTER_RESULTS_FILE, "polygon tornado tracks spatial-temporal query with temporal only index", true);
    TestUtils.deleteAll(dataStore);
    // test spatial temporal queries with temporal index for hail points
    TestUtils.testLocalIngest(dataStore, DimensionalityType.TEMPORAL, HAIL_SHAPEFILE_FILE, 1);
    verifyQuery(context, TEST_BOX_TEMPORAL_FILTER_FILE, HAIL_EXPECTED_BOX_TEMPORAL_FILTER_RESULTS_FILE, "bounding box hail spatial-temporal query with temporal index only", false);
    verifyQuery(context, TEST_POLYGON_TEMPORAL_FILTER_FILE, HAIL_EXPECTED_POLYGON_TEMPORAL_FILTER_RESULTS_FILE, "polygon hail spatial-temporal query with temporal index only", true);
    TestUtils.deleteAll(dataStore);
    // ingest test points
    TestUtils.testLocalIngest(dataStore, DimensionalityType.SPATIAL_AND_SPATIAL_TEMPORAL, HAIL_SHAPEFILE_FILE, 1);
    verifyQuery(context, TEST_BOX_FILTER_FILE, HAIL_EXPECTED_BOX_FILTER_RESULTS_FILE, "bounding box hail spatial query", true);
    verifyQuery(context, TEST_POLYGON_FILTER_FILE, HAIL_EXPECTED_POLYGON_FILTER_RESULTS_FILE, "polygon hail spatial query", true);
    verifyQuery(context, TEST_BOX_TEMPORAL_FILTER_FILE, HAIL_EXPECTED_BOX_TEMPORAL_FILTER_RESULTS_FILE, "bounding box hail spatial-temporal query", false);
    verifyQuery(context, TEST_POLYGON_TEMPORAL_FILTER_FILE, HAIL_EXPECTED_POLYGON_TEMPORAL_FILTER_RESULTS_FILE, "polygon hail spatial-temporal query", true);
    // test configurable CRS for hail points
    verifyQuery(context, TEST_BOX_FILTER_FILE, HAIL_EXPECTED_BOX_FILTER_RESULTS_FILE, "bounding box hail spatial query with other CRS", TestUtils.CUSTOM_CRS, true);
    verifyQuery(context, TEST_POLYGON_FILTER_FILE, HAIL_EXPECTED_POLYGON_FILTER_RESULTS_FILE, "polygon hail spatial query with other CRS", TestUtils.CUSTOM_CRS, true);
    verifyQuery(context, TEST_BOX_TEMPORAL_FILTER_FILE, HAIL_EXPECTED_BOX_TEMPORAL_FILTER_RESULTS_FILE, "bounding box hail spatial-temporal query with other CRS", TestUtils.CUSTOM_CRS, true);
    verifyQuery(context, TEST_POLYGON_TEMPORAL_FILTER_FILE, HAIL_EXPECTED_POLYGON_TEMPORAL_FILTER_RESULTS_FILE, "polygon hail spatial-temporal query with other CRS", TestUtils.CUSTOM_CRS, false);
    TestUtils.deleteAll(dataStore);
    // test lines only
    TestUtils.testLocalIngest(dataStore, DimensionalityType.SPATIAL_AND_SPATIAL_TEMPORAL, TORNADO_TRACKS_SHAPEFILE_FILE, 1);
    verifyQuery(context, TEST_BOX_FILTER_FILE, TORNADO_TRACKS_EXPECTED_BOX_FILTER_RESULTS_FILE, "bounding box tornado tracks spatial query", true);
    verifyQuery(context, TEST_POLYGON_FILTER_FILE, TORNADO_TRACKS_EXPECTED_POLYGON_FILTER_RESULTS_FILE, "polygon tornado tracks spatial query", true);
    verifyQuery(context, TEST_BOX_TEMPORAL_FILTER_FILE, TORNADO_TRACKS_EXPECTED_BOX_TEMPORAL_FILTER_RESULTS_FILE, "bounding box tornado tracks spatial-temporal query", true);
    verifyQuery(context, TEST_POLYGON_TEMPORAL_FILTER_FILE, TORNADO_TRACKS_EXPECTED_POLYGON_TEMPORAL_FILTER_RESULTS_FILE, "polygon tornado tracks spatial-temporal query", true);
    // test configurable CRS for tornado tracks
    verifyQuery(context, TEST_BOX_FILTER_FILE, TORNADO_TRACKS_EXPECTED_BOX_FILTER_RESULTS_FILE, "bounding box tornado tracks spatial query with other CRS", TestUtils.CUSTOM_CRS, true);
    verifyQuery(context, TEST_POLYGON_FILTER_FILE, TORNADO_TRACKS_EXPECTED_POLYGON_FILTER_RESULTS_FILE, "polygon tornado tracks spatial query with other CRS", TestUtils.CUSTOM_CRS, true);
    verifyQuery(context, TEST_BOX_TEMPORAL_FILTER_FILE, TORNADO_TRACKS_EXPECTED_BOX_TEMPORAL_FILTER_RESULTS_FILE, "bounding box tornado tracks spatial-temporal query with other CRS", TestUtils.CUSTOM_CRS, false);
    verifyQuery(context, TEST_POLYGON_TEMPORAL_FILTER_FILE, TORNADO_TRACKS_EXPECTED_POLYGON_TEMPORAL_FILTER_RESULTS_FILE, "polygon tornado tracks spatial-temporal query with other CRS", TestUtils.CUSTOM_CRS, true);
    // now test with both ingested
    TestUtils.testLocalIngest(dataStore, DimensionalityType.SPATIAL_AND_SPATIAL_TEMPORAL, HAIL_SHAPEFILE_FILE, 1);
    // Retrieve the adapters
    final InternalDataAdapter<?>[] adapters = dataStore.createAdapterStore().getAdapters();
    DataTypeAdapter hailAdapter = null;
    DataTypeAdapter tornadoAdapter = null;
    for (final DataTypeAdapter adapter : adapters) {
        final String adapterName = adapter.getTypeName();
        if (adapterName.equals("hail")) {
            hailAdapter = adapter;
        } else {
            tornadoAdapter = adapter;
        }
        LOGGER.warn("DataStore has feature adapter: " + adapterName);
    }
    // Load RDD using hail adapter
    try {
        final RDDOptions queryOpts = new RDDOptions();
        queryOpts.setQuery(QueryBuilder.newBuilder().addTypeName(hailAdapter.getTypeName()).build());
        final GeoWaveRDD newRDD = GeoWaveRDDLoader.loadRDD(context, dataStore, queryOpts);
        final JavaPairRDD<GeoWaveInputKey, SimpleFeature> javaRdd = newRDD.getRawRDD();
        final long count = SparkUtils.getCount(javaRdd, dataStore.getType());
        Assert.assertEquals(HAIL_COUNT, count);
        LOGGER.warn("DataStore loaded into RDD with " + count + " features for adapter " + hailAdapter.getTypeName());
    } catch (final Exception e) {
        e.printStackTrace();
        TestUtils.deleteAll(dataStore);
        Assert.fail("Error occurred while loading RDD with adapter: '" + e.getLocalizedMessage() + "'");
    }
    // Load RDD using tornado adapter
    try {
        final RDDOptions queryOpts = new RDDOptions();
        queryOpts.setQuery(QueryBuilder.newBuilder().addTypeName(tornadoAdapter.getTypeName()).build());
        final GeoWaveRDD newRDD = GeoWaveRDDLoader.loadRDD(context, dataStore, queryOpts);
        final JavaPairRDD<GeoWaveInputKey, SimpleFeature> javaRdd = newRDD.getRawRDD();
        final long count = SparkUtils.getCount(javaRdd, dataStore.getType());
        LOGGER.warn("DataStore loaded into RDD with " + count + " features for adapter " + tornadoAdapter.getTypeName());
        Assert.assertEquals(TORNADO_COUNT, count);
    } catch (final Exception e) {
        e.printStackTrace();
        TestUtils.deleteAll(dataStore);
        Assert.fail("Error occurred while loading RDD with adapter: '" + e.getLocalizedMessage() + "'");
    }
    // Clean up
    TestUtils.deleteAll(dataStore);
}
Also used : SparkContext(org.apache.spark.SparkContext) DataTypeAdapter(org.locationtech.geowave.core.store.api.DataTypeAdapter) InternalDataAdapter(org.locationtech.geowave.core.store.adapter.InternalDataAdapter) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) GeoWaveInputKey(org.locationtech.geowave.mapreduce.input.GeoWaveInputKey) RDDOptions(org.locationtech.geowave.analytic.spark.RDDOptions) SimpleFeature(org.opengis.feature.simple.SimpleFeature) MalformedURLException(java.net.MalformedURLException) Test(org.junit.Test)

Example 5 with GeoWaveRDD

use of org.locationtech.geowave.analytic.spark.GeoWaveRDD in project geowave by locationtech.

the class GeoWaveSparkSQLIT method testCreateDataFrame.

@Test
public void testCreateDataFrame() throws Exception {
    // Set up Spark
    final SparkSession session = SparkTestEnvironment.getInstance().getDefaultSession();
    final SparkContext context = session.sparkContext();
    // ingest test points
    TestUtils.testLocalIngest(dataStore, DimensionalityType.SPATIAL, HAIL_SHAPEFILE_FILE, 1);
    final SqlQueryRunner queryRunner = new SqlQueryRunner();
    queryRunner.setSparkSession(session);
    try {
        // Load RDD from datastore, no filters
        final GeoWaveRDD newRDD = GeoWaveRDDLoader.loadRDD(context, dataStore, new RDDOptions());
        final JavaPairRDD<GeoWaveInputKey, SimpleFeature> javaRdd = newRDD.getRawRDD();
        final long count = javaRdd.count();
        LOGGER.warn("DataStore loaded into RDD with " + count + " features.");
        queryRunner.addInputStore(dataStore, null, "features");
        final String bbox = "POLYGON ((-94 34, -93 34, -93 35, -94 35, -94 34))";
        queryRunner.setSql("SELECT * FROM features WHERE GeomContains(GeomFromWKT('" + bbox + "'), geom)");
        Dataset<Row> results = queryRunner.run();
        final long containsCount = results.count();
        LOGGER.warn("Got " + containsCount + " for GeomContains test");
        queryRunner.setSql("SELECT * FROM features WHERE GeomWithin(geom, GeomFromWKT('" + bbox + "'))");
        results = queryRunner.run();
        final long withinCount = results.count();
        LOGGER.warn("Got " + withinCount + " for GeomWithin test");
        Assert.assertTrue("Within and Contains counts should be equal", containsCount == withinCount);
        // Test the output writer
        final SqlResultsWriter sqlResultsWriter = new SqlResultsWriter(results, dataStore);
        sqlResultsWriter.writeResults("sqltest");
        queryRunner.removeAllStores();
        // Test other spatial UDFs
        final String line1 = "LINESTRING(0 0, 10 10)";
        final String line2 = "LINESTRING(0 10, 10 0)";
        queryRunner.setSql("SELECT GeomIntersects(GeomFromWKT('" + line1 + "'), GeomFromWKT('" + line2 + "'))");
        Row result = queryRunner.run().head();
        final boolean intersect = result.getBoolean(0);
        LOGGER.warn("GeomIntersects returned " + intersect);
        Assert.assertTrue("Lines should intersect", intersect);
        queryRunner.setSql("SELECT GeomDisjoint(GeomFromWKT('" + line1 + "'), GeomFromWKT('" + line2 + "'))");
        result = queryRunner.run().head();
        final boolean disjoint = result.getBoolean(0);
        LOGGER.warn("GeomDisjoint returned " + disjoint);
        Assert.assertFalse("Lines should not be disjoint", disjoint);
    } catch (final Exception e) {
        e.printStackTrace();
        TestUtils.deleteAll(dataStore);
        Assert.fail("Error occurred while testing a bounding box query of spatial index: '" + e.getLocalizedMessage() + "'");
    }
    // Clean up
    TestUtils.deleteAll(dataStore);
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) SqlResultsWriter(org.locationtech.geowave.analytic.spark.sparksql.SqlResultsWriter) GeoWaveInputKey(org.locationtech.geowave.mapreduce.input.GeoWaveInputKey) SqlQueryRunner(org.locationtech.geowave.analytic.spark.sparksql.SqlQueryRunner) RDDOptions(org.locationtech.geowave.analytic.spark.RDDOptions) SimpleFeature(org.opengis.feature.simple.SimpleFeature) SparkContext(org.apache.spark.SparkContext) GeoWaveRDD(org.locationtech.geowave.analytic.spark.GeoWaveRDD) Row(org.apache.spark.sql.Row) Test(org.junit.Test)

Aggregations

GeoWaveRDD (org.locationtech.geowave.analytic.spark.GeoWaveRDD)6 RDDOptions (org.locationtech.geowave.analytic.spark.RDDOptions)5 GeoWaveInputKey (org.locationtech.geowave.mapreduce.input.GeoWaveInputKey)5 SimpleFeature (org.opengis.feature.simple.SimpleFeature)5 SparkContext (org.apache.spark.SparkContext)3 SparkSession (org.apache.spark.sql.SparkSession)3 IOException (java.io.IOException)2 Arrays (java.util.Arrays)2 List (java.util.List)2 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)2 JavaRDD (org.apache.spark.api.java.JavaRDD)2 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)2 Function (org.apache.spark.api.java.function.Function)2 PairFlatMapFunction (org.apache.spark.api.java.function.PairFlatMapFunction)2 Test (org.junit.Test)2 VectorQueryBuilder (org.locationtech.geowave.core.geotime.store.query.api.VectorQueryBuilder)2 Geometry (org.locationtech.jts.geom.Geometry)2 ParameterException (com.beust.jcommander.ParameterException)1 Iterators (com.google.common.collect.Iterators)1 Lists (com.google.common.collect.Lists)1