Search in sources :

Example 1 with SimpleFeatureDataFrame

use of org.locationtech.geowave.analytic.spark.sparksql.SimpleFeatureDataFrame in project geowave by locationtech.

the class GeoWaveSparkSpatialJoinIT method runBruteForceJoin.

private long runBruteForceJoin(final String hail_adapter, final String tornado_adapter, final String sqlHail, final String sqlTornado) {
    final long mark = System.currentTimeMillis();
    final SimpleFeatureDataFrame hailFrame = new SimpleFeatureDataFrame(session);
    final SimpleFeatureDataFrame tornadoFrame = new SimpleFeatureDataFrame(session);
    tornadoFrame.init(tornadoStore, tornado_adapter);
    tornadoFrame.getDataFrame(tornadoRDD).createOrReplaceTempView("tornado");
    hailFrame.init(hailStore, hail_adapter);
    hailFrame.getDataFrame(hailRDD).createOrReplaceTempView("hail");
    hailBruteResults = session.sql(sqlHail);
    hailBruteResults = hailBruteResults.dropDuplicates();
    hailBruteResults.cache();
    hailBruteCount = hailBruteResults.count();
    tornadoBruteResults = session.sql(sqlTornado);
    tornadoBruteResults = tornadoBruteResults.dropDuplicates();
    tornadoBruteResults.cache();
    tornadoBruteCount = tornadoBruteResults.count();
    final long dur = (System.currentTimeMillis() - mark);
    LOGGER.warn("Brute Result Count: " + (tornadoBruteCount + hailBruteCount));
    return dur;
}
Also used : SimpleFeatureDataFrame(org.locationtech.geowave.analytic.spark.sparksql.SimpleFeatureDataFrame)

Example 2 with SimpleFeatureDataFrame

use of org.locationtech.geowave.analytic.spark.sparksql.SimpleFeatureDataFrame in project geowave by locationtech.

the class GeoWaveSparkSpatialJoinIT method testHailTornadoDistanceJoin.

@Test
public void testHailTornadoDistanceJoin() throws Exception {
    session = SparkTestEnvironment.getInstance().getDefaultSession();
    context = session.sparkContext();
    GeomFunctionRegistry.registerGeometryFunctions(session);
    LOGGER.debug("Testing DataStore Type: " + hailStore.getType());
    long mark = System.currentTimeMillis();
    ingestHailandTornado();
    long dur = (System.currentTimeMillis() - mark);
    final String hail_adapter = "hail";
    final String tornado_adapter = "tornado_tracks";
    final GeomWithinDistance distancePredicate = new GeomWithinDistance(0.01);
    final String sqlHail = "select hail.* from hail, tornado where GeomDistance(hail.geom,tornado.geom) <= 0.01";
    final String sqlTornado = "select tornado.* from hail, tornado where GeomDistance(hail.geom,tornado.geom) <= 0.01";
    final SpatialJoinRunner runner = new SpatialJoinRunner(session);
    runner.setLeftStore(hailStore);
    runner.setLeftAdapterTypeName(hail_adapter);
    runner.setRightStore(tornadoStore);
    runner.setRightAdapterTypeName(tornado_adapter);
    runner.setPredicate(distancePredicate);
    loadRDDs(hail_adapter, tornado_adapter);
    long tornadoIndexedCount = 0;
    long hailIndexedCount = 0;
    LOGGER.warn("------------ Running indexed spatial join. ----------");
    mark = System.currentTimeMillis();
    try {
        runner.run();
    } catch (InterruptedException | ExecutionException e) {
        LOGGER.error("Async error in join");
        e.printStackTrace();
    } catch (final IOException e) {
        LOGGER.error("IO error in join");
        e.printStackTrace();
    }
    hailIndexedCount = runner.getLeftResults().getRawRDD().count();
    tornadoIndexedCount = runner.getRightResults().getRawRDD().count();
    final long indexJoinDur = (System.currentTimeMillis() - mark);
    LOGGER.warn("Indexed Result Count: " + (hailIndexedCount + tornadoIndexedCount));
    final SimpleFeatureDataFrame indexHailFrame = new SimpleFeatureDataFrame(session);
    final SimpleFeatureDataFrame indexTornadoFrame = new SimpleFeatureDataFrame(session);
    indexTornadoFrame.init(tornadoStore, tornado_adapter);
    final Dataset<Row> indexedTornado = indexTornadoFrame.getDataFrame(runner.getRightResults());
    indexHailFrame.init(hailStore, hail_adapter);
    final Dataset<Row> indexedHail = indexHailFrame.getDataFrame(runner.getLeftResults());
    LOGGER.warn("------------ Running Brute force spatial join. ----------");
    dur = runBruteForceJoin(hail_adapter, tornado_adapter, sqlHail, sqlTornado);
    LOGGER.warn("Indexed join duration = " + indexJoinDur + " ms.");
    LOGGER.warn("Brute join duration = " + dur + " ms.");
    // Verify each row matches
    Assert.assertTrue((hailIndexedCount == hailBruteCount));
    Assert.assertTrue((tornadoIndexedCount == tornadoBruteCount));
    Dataset<Row> subtractedFrame = indexedHail.except(hailBruteResults);
    subtractedFrame = subtractedFrame.cache();
    Assert.assertTrue("Subtraction between brute force join and indexed Hail should result in count of 0", (subtractedFrame.count() == 0));
    subtractedFrame.unpersist();
    subtractedFrame = indexedTornado.except(tornadoBruteResults);
    subtractedFrame = subtractedFrame.cache();
    Assert.assertTrue("Subtraction between brute force join and indexed Tornado should result in count of 0", (subtractedFrame.count() == 0));
    TestUtils.deleteAll(hailStore);
    TestUtils.deleteAll(tornadoStore);
}
Also used : GeomWithinDistance(org.locationtech.geowave.analytic.spark.sparksql.udf.GeomWithinDistance) IOException(java.io.IOException) Row(org.apache.spark.sql.Row) ExecutionException(java.util.concurrent.ExecutionException) SpatialJoinRunner(org.locationtech.geowave.analytic.spark.spatial.SpatialJoinRunner) SimpleFeatureDataFrame(org.locationtech.geowave.analytic.spark.sparksql.SimpleFeatureDataFrame) Test(org.junit.Test)

Aggregations

SimpleFeatureDataFrame (org.locationtech.geowave.analytic.spark.sparksql.SimpleFeatureDataFrame)2 IOException (java.io.IOException)1 ExecutionException (java.util.concurrent.ExecutionException)1 Row (org.apache.spark.sql.Row)1 Test (org.junit.Test)1 GeomWithinDistance (org.locationtech.geowave.analytic.spark.sparksql.udf.GeomWithinDistance)1 SpatialJoinRunner (org.locationtech.geowave.analytic.spark.spatial.SpatialJoinRunner)1