use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.
the class Chap17 method c_4.
static void c_4() throws Exception {
AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
new BisectingKMeans().setK(3).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo("BiSecting KMeans EUCLIDEAN").fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Bisecting KMeans EUCLIDEAN"));
BatchOperator.execute();
new BisectingKMeans().setDistanceType(DistanceType.COSINE).setK(3).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo("BiSecting KMeans COSINE").fit(source).transform(source).link(new EvalClusterBatchOp().setDistanceType("COSINE").setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Bisecting KMeans COSINE"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.
the class Chap17 method c_3_2.
static void c_3_2() throws Exception {
AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
new GaussianMixture().setK(2).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("GaussianMixture 2"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.
the class Chap17 method c_5.
static void c_5() throws Exception {
BatchOperator.setParallelism(1);
MemSourceBatchOp source = new MemSourceBatchOp(ROWS_DATA, new String[] { "State", "Region", "Division", "longitude", "latitude" });
source.lazyPrint(5);
source.select("Region").distinct().lazyPrint(-1);
source.select("Division").distinct().lazyPrint(-1);
source.groupBy("Region, Division", "Region, Division, COUNT(*) AS numStates").orderBy("Region, Division", 100).lazyPrint(-1);
for (int nClusters : new int[] { 2, 4 }) {
BatchOperator<?> pred = new GeoKMeans().setLongitudeCol("longitude").setLatitudeCol("latitude").setPredictionCol(PREDICTION_COL_NAME).setK(nClusters).fit(source).transform(source);
pred.link(new EvalClusterBatchOp().setPredictionCol(PREDICTION_COL_NAME).setLabelCol("Region").lazyPrintMetrics(nClusters + " with Region"));
pred.link(new EvalClusterBatchOp().setPredictionCol(PREDICTION_COL_NAME).setLabelCol("Division").lazyPrintMetrics(nClusters + " with Division"));
BatchOperator.execute();
}
}
use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.
the class Chap18 method c_2.
static void c_2() throws Exception {
AkSourceBatchOp batch_source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
if (!new File(DATA_DIR + INIT_MODEL_FILE).exists()) {
batch_source.sampleWithSize(100).link(new KMeansTrainBatchOp().setVectorCol(VECTOR_COL_NAME).setK(10)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE));
BatchOperator.execute();
}
AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).linkFrom(init_model, batch_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Batch Prediction"));
BatchOperator.execute();
stream_source.link(new KMeansPredictStreamOp(init_model).setPredictionCol(PREDICTION_COL_NAME)).link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
StreamOperator.execute();
new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Stream Prediction"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.
the class Chap18 method c_3.
static void c_3() throws Exception {
AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
StreamOperator<?> stream_pred = stream_source.link(new StreamingKMeansStreamOp(init_model).setTimeInterval(1L).setHalfLife(1).setPredictionCol(PREDICTION_COL_NAME)).select(PREDICTION_COL_NAME + ", " + LABEL_COL_NAME + ", " + VECTOR_COL_NAME);
stream_pred.sample(0.001).print();
stream_pred.link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
StreamOperator.execute();
new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("StreamingKMeans"));
BatchOperator.execute();
}
Aggregations