use of org.locationtech.geowave.analytic.spark.kmeans.KMeansRunner in project geowave by locationtech.
the class KmeansSparkCommand method computeResults.
@Override
public Void computeResults(final OperationParams params) throws Exception {
final String inputStoreName = parameters.get(0);
final String outputStoreName = parameters.get(1);
// Config file
final File configFile = getGeoWaveConfigFile(params);
// Attempt to load input store.
inputDataStore = CLIUtils.loadStore(inputStoreName, configFile, params.getConsole());
// Attempt to load output store.
outputDataStore = CLIUtils.loadStore(outputStoreName, configFile, params.getConsole());
// Save a reference to the store in the property management.
final PersistableStore persistedStore = new PersistableStore(inputDataStore);
final PropertyManagement properties = new PropertyManagement();
properties.store(StoreParameters.StoreParam.INPUT_STORE, persistedStore);
// Convert properties from DBScanOptions and CommonOptions
final PropertyManagementConverter converter = new PropertyManagementConverter(properties);
converter.readProperties(kMeansSparkOptions);
final KMeansRunner runner = new KMeansRunner();
runner.setAppName(kMeansSparkOptions.getAppName());
runner.setMaster(kMeansSparkOptions.getMaster());
runner.setHost(kMeansSparkOptions.getHost());
runner.setSplits(kMeansSparkOptions.getMinSplits(), kMeansSparkOptions.getMaxSplits());
runner.setInputDataStore(inputDataStore);
runner.setNumClusters(kMeansSparkOptions.getNumClusters());
runner.setNumIterations(kMeansSparkOptions.getNumIterations());
runner.setUseTime(kMeansSparkOptions.isUseTime());
runner.setTypeName(kMeansSparkOptions.getTypeName());
if (kMeansSparkOptions.getEpsilon() != null) {
runner.setEpsilon(kMeansSparkOptions.getEpsilon());
}
if (kMeansSparkOptions.getTypeName() != null) {
runner.setTypeName(kMeansSparkOptions.getTypeName());
}
if (kMeansSparkOptions.getCqlFilter() != null) {
runner.setCqlFilter(kMeansSparkOptions.getCqlFilter());
}
runner.setGenerateHulls(kMeansSparkOptions.isGenerateHulls());
runner.setComputeHullData(kMeansSparkOptions.isComputeHullData());
runner.setHullTypeName(kMeansSparkOptions.getHullTypeName());
runner.setCentroidTypeName(kMeansSparkOptions.getCentroidTypeName());
runner.setOutputDataStore(outputDataStore);
try {
runner.run();
} catch (final IOException e) {
throw new RuntimeException("Failed to execute: " + e.getMessage());
} finally {
runner.close();
}
return null;
}
use of org.locationtech.geowave.analytic.spark.kmeans.KMeansRunner in project geowave by locationtech.
the class GeoWaveSparkKMeansIT method testKMeansRunner.
@Test
public void testKMeansRunner() throws Exception {
// Load data
TestUtils.testLocalIngest(inputDataStore, DimensionalityType.SPATIAL, HAIL_SHAPEFILE_FILE, 1);
// Create the runner
long mark = System.currentTimeMillis();
final KMeansRunner runner = new KMeansRunner();
runner.setSparkSession(SparkTestEnvironment.getInstance().defaultSession);
runner.setInputDataStore(inputDataStore);
runner.setTypeName("hail");
runner.setCqlFilter(CQL_FILTER);
runner.setUseTime(true);
// Set output params to write centroids + hulls to store.
runner.setOutputDataStore(inputDataStore);
runner.setCentroidTypeName("kmeans-centroids-test");
runner.setGenerateHulls(true);
runner.setComputeHullData(true);
runner.setHullTypeName("kmeans-hulls-test");
// Run kmeans
try {
runner.run();
} catch (final IOException e) {
throw new RuntimeException("Failed to execute: " + e.getMessage());
}
// Create the output
final KMeansModel clusterModel = runner.getOutputModel();
long dur = (System.currentTimeMillis() - mark);
LOGGER.warn("KMeans duration: " + dur + " ms.");
// Write out the centroid features
final short centroidInternalAdapterId = inputDataStore.createInternalAdapterStore().getAdapterId("kmeans-centroids-test");
final DataTypeAdapter centroidAdapter = inputDataStore.createAdapterStore().getAdapter(centroidInternalAdapterId);
// Query back from the new adapter
mark = System.currentTimeMillis();
queryFeatures(centroidAdapter, clusterModel.clusterCenters().length);
dur = (System.currentTimeMillis() - mark);
LOGGER.warn("Centroid verify: " + dur + " ms.");
// Generate the hulls
final JavaPairRDD<Integer, Iterable<Vector>> groupByRDD = KMeansHullGenerator.groupByIndex(runner.getInputCentroids(), clusterModel);
final JavaPairRDD<Integer, Geometry> hullsRDD = KMeansHullGenerator.generateHullsRDD(groupByRDD);
Assert.assertTrue("centroids from the model should match the hull count", clusterModel.clusterCenters().length == hullsRDD.count());
System.out.println("KMeans cluster hulls:");
for (final Tuple2<Integer, Geometry> hull : hullsRDD.collect()) {
System.out.println("> Hull size (verts): " + hull._2.getNumPoints());
System.out.println("> Hull centroid: " + hull._2.getCentroid().toString());
}
final short hullInternalAdapterId = inputDataStore.createInternalAdapterStore().getAdapterId("kmeans-hulls-test");
// Write out the hull features w/ metadata
final DataTypeAdapter hullAdapter = inputDataStore.createAdapterStore().getAdapter(hullInternalAdapterId);
mark = System.currentTimeMillis();
// Query back from the new adapter
queryFeatures(hullAdapter, clusterModel.clusterCenters().length);
dur = (System.currentTimeMillis() - mark);
LOGGER.warn("Hull verify: " + dur + " ms.");
TestUtils.deleteAll(inputDataStore);
}
Aggregations