use of com.alibaba.alink.pipeline.clustering.GeoKMeans in project Alink by alibaba.
the class Chap17 method c_5.
static void c_5() throws Exception {
BatchOperator.setParallelism(1);
MemSourceBatchOp source = new MemSourceBatchOp(ROWS_DATA, new String[] { "State", "Region", "Division", "longitude", "latitude" });
source.lazyPrint(5);
source.select("Region").distinct().lazyPrint(-1);
source.select("Division").distinct().lazyPrint(-1);
source.groupBy("Region, Division", "Region, Division, COUNT(*) AS numStates").orderBy("Region, Division", 100).lazyPrint(-1);
for (int nClusters : new int[] { 2, 4 }) {
BatchOperator<?> pred = new GeoKMeans().setLongitudeCol("longitude").setLatitudeCol("latitude").setPredictionCol(PREDICTION_COL_NAME).setK(nClusters).fit(source).transform(source);
pred.link(new EvalClusterBatchOp().setPredictionCol(PREDICTION_COL_NAME).setLabelCol("Region").lazyPrintMetrics(nClusters + " with Region"));
pred.link(new EvalClusterBatchOp().setPredictionCol(PREDICTION_COL_NAME).setLabelCol("Division").lazyPrintMetrics(nClusters + " with Division"));
BatchOperator.execute();
}
}
Aggregations