Search in sources :

Example 1 with KMeansRunner

use of org.locationtech.geowave.analytic.spark.kmeans.KMeansRunner in project geowave by locationtech.

the class KmeansSparkCommand method computeResults.

@Override
public Void computeResults(final OperationParams params) throws Exception {
    final String inputStoreName = parameters.get(0);
    final String outputStoreName = parameters.get(1);
    // Config file
    final File configFile = getGeoWaveConfigFile(params);
    // Attempt to load input store.
    inputDataStore = CLIUtils.loadStore(inputStoreName, configFile, params.getConsole());
    // Attempt to load output store.
    outputDataStore = CLIUtils.loadStore(outputStoreName, configFile, params.getConsole());
    // Save a reference to the store in the property management.
    final PersistableStore persistedStore = new PersistableStore(inputDataStore);
    final PropertyManagement properties = new PropertyManagement();
    properties.store(StoreParameters.StoreParam.INPUT_STORE, persistedStore);
    // Convert properties from DBScanOptions and CommonOptions
    final PropertyManagementConverter converter = new PropertyManagementConverter(properties);
    converter.readProperties(kMeansSparkOptions);
    final KMeansRunner runner = new KMeansRunner();
    runner.setAppName(kMeansSparkOptions.getAppName());
    runner.setMaster(kMeansSparkOptions.getMaster());
    runner.setHost(kMeansSparkOptions.getHost());
    runner.setSplits(kMeansSparkOptions.getMinSplits(), kMeansSparkOptions.getMaxSplits());
    runner.setInputDataStore(inputDataStore);
    runner.setNumClusters(kMeansSparkOptions.getNumClusters());
    runner.setNumIterations(kMeansSparkOptions.getNumIterations());
    runner.setUseTime(kMeansSparkOptions.isUseTime());
    runner.setTypeName(kMeansSparkOptions.getTypeName());
    if (kMeansSparkOptions.getEpsilon() != null) {
        runner.setEpsilon(kMeansSparkOptions.getEpsilon());
    }
    if (kMeansSparkOptions.getTypeName() != null) {
        runner.setTypeName(kMeansSparkOptions.getTypeName());
    }
    if (kMeansSparkOptions.getCqlFilter() != null) {
        runner.setCqlFilter(kMeansSparkOptions.getCqlFilter());
    }
    runner.setGenerateHulls(kMeansSparkOptions.isGenerateHulls());
    runner.setComputeHullData(kMeansSparkOptions.isComputeHullData());
    runner.setHullTypeName(kMeansSparkOptions.getHullTypeName());
    runner.setCentroidTypeName(kMeansSparkOptions.getCentroidTypeName());
    runner.setOutputDataStore(outputDataStore);
    try {
        runner.run();
    } catch (final IOException e) {
        throw new RuntimeException("Failed to execute: " + e.getMessage());
    } finally {
        runner.close();
    }
    return null;
}
Also used : PropertyManagementConverter(org.locationtech.geowave.analytic.mapreduce.operations.options.PropertyManagementConverter) PersistableStore(org.locationtech.geowave.analytic.store.PersistableStore) PropertyManagement(org.locationtech.geowave.analytic.PropertyManagement) IOException(java.io.IOException) File(java.io.File) KMeansRunner(org.locationtech.geowave.analytic.spark.kmeans.KMeansRunner)

Example 2 with KMeansRunner

use of org.locationtech.geowave.analytic.spark.kmeans.KMeansRunner in project geowave by locationtech.

the class GeoWaveSparkKMeansIT method testKMeansRunner.

@Test
public void testKMeansRunner() throws Exception {
    // Load data
    TestUtils.testLocalIngest(inputDataStore, DimensionalityType.SPATIAL, HAIL_SHAPEFILE_FILE, 1);
    // Create the runner
    long mark = System.currentTimeMillis();
    final KMeansRunner runner = new KMeansRunner();
    runner.setSparkSession(SparkTestEnvironment.getInstance().defaultSession);
    runner.setInputDataStore(inputDataStore);
    runner.setTypeName("hail");
    runner.setCqlFilter(CQL_FILTER);
    runner.setUseTime(true);
    // Set output params to write centroids + hulls to store.
    runner.setOutputDataStore(inputDataStore);
    runner.setCentroidTypeName("kmeans-centroids-test");
    runner.setGenerateHulls(true);
    runner.setComputeHullData(true);
    runner.setHullTypeName("kmeans-hulls-test");
    // Run kmeans
    try {
        runner.run();
    } catch (final IOException e) {
        throw new RuntimeException("Failed to execute: " + e.getMessage());
    }
    // Create the output
    final KMeansModel clusterModel = runner.getOutputModel();
    long dur = (System.currentTimeMillis() - mark);
    LOGGER.warn("KMeans duration: " + dur + " ms.");
    // Write out the centroid features
    final short centroidInternalAdapterId = inputDataStore.createInternalAdapterStore().getAdapterId("kmeans-centroids-test");
    final DataTypeAdapter centroidAdapter = inputDataStore.createAdapterStore().getAdapter(centroidInternalAdapterId);
    // Query back from the new adapter
    mark = System.currentTimeMillis();
    queryFeatures(centroidAdapter, clusterModel.clusterCenters().length);
    dur = (System.currentTimeMillis() - mark);
    LOGGER.warn("Centroid verify: " + dur + " ms.");
    // Generate the hulls
    final JavaPairRDD<Integer, Iterable<Vector>> groupByRDD = KMeansHullGenerator.groupByIndex(runner.getInputCentroids(), clusterModel);
    final JavaPairRDD<Integer, Geometry> hullsRDD = KMeansHullGenerator.generateHullsRDD(groupByRDD);
    Assert.assertTrue("centroids from the model should match the hull count", clusterModel.clusterCenters().length == hullsRDD.count());
    System.out.println("KMeans cluster hulls:");
    for (final Tuple2<Integer, Geometry> hull : hullsRDD.collect()) {
        System.out.println("> Hull size (verts): " + hull._2.getNumPoints());
        System.out.println("> Hull centroid: " + hull._2.getCentroid().toString());
    }
    final short hullInternalAdapterId = inputDataStore.createInternalAdapterStore().getAdapterId("kmeans-hulls-test");
    // Write out the hull features w/ metadata
    final DataTypeAdapter hullAdapter = inputDataStore.createAdapterStore().getAdapter(hullInternalAdapterId);
    mark = System.currentTimeMillis();
    // Query back from the new adapter
    queryFeatures(hullAdapter, clusterModel.clusterCenters().length);
    dur = (System.currentTimeMillis() - mark);
    LOGGER.warn("Hull verify: " + dur + " ms.");
    TestUtils.deleteAll(inputDataStore);
}
Also used : KMeansModel(org.apache.spark.mllib.clustering.KMeansModel) DataTypeAdapter(org.locationtech.geowave.core.store.api.DataTypeAdapter) IOException(java.io.IOException) KMeansRunner(org.locationtech.geowave.analytic.spark.kmeans.KMeansRunner) Geometry(org.locationtech.jts.geom.Geometry) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)2 KMeansRunner (org.locationtech.geowave.analytic.spark.kmeans.KMeansRunner)2 File (java.io.File)1 KMeansModel (org.apache.spark.mllib.clustering.KMeansModel)1 Test (org.junit.Test)1 PropertyManagement (org.locationtech.geowave.analytic.PropertyManagement)1 PropertyManagementConverter (org.locationtech.geowave.analytic.mapreduce.operations.options.PropertyManagementConverter)1 PersistableStore (org.locationtech.geowave.analytic.store.PersistableStore)1 DataTypeAdapter (org.locationtech.geowave.core.store.api.DataTypeAdapter)1 Geometry (org.locationtech.jts.geom.Geometry)1