use of com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp in project Alink by alibaba.
the class Chap18 method c_2.
static void c_2() throws Exception {
AkSourceBatchOp batch_source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
if (!new File(DATA_DIR + INIT_MODEL_FILE).exists()) {
batch_source.sampleWithSize(100).link(new KMeansTrainBatchOp().setVectorCol(VECTOR_COL_NAME).setK(10)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE));
BatchOperator.execute();
}
AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).linkFrom(init_model, batch_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Batch Prediction"));
BatchOperator.execute();
stream_source.link(new KMeansPredictStreamOp(init_model).setPredictionCol(PREDICTION_COL_NAME)).link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
StreamOperator.execute();
new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Stream Prediction"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp in project Alink by alibaba.
the class KMeansTest method testInitializer.
@Test
public void testInitializer() {
KMeansModel model = new KMeansModel();
assertEquals(model.getParams().size(), 0);
KMeans kMeans = new KMeans(new Params());
assertEquals(kMeans.getParams().size(), 0);
KMeansTrainBatchOp op = new KMeansTrainBatchOp();
assertEquals(op.getParams().size(), 0);
KMeansPredictBatchOp predict = new KMeansPredictBatchOp(new Params());
assertEquals(predict.getParams().size(), 0);
predict = new KMeansPredictBatchOp();
assertEquals(predict.getParams().size(), 0);
KMeansPredictStreamOp predictStream = new KMeansPredictStreamOp(op, new Params());
assertEquals(predictStream.getParams().size(), 0);
predictStream = new KMeansPredictStreamOp(predict);
assertEquals(predictStream.getParams().size(), 0);
}
use of com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp in project Alink by alibaba.
the class StreamingKMeansStreamOpTest method testStreamingKmeans.
@Test
public void testStreamingKmeans() throws Exception {
BatchOperator<?> model = new KMeansTrainBatchOp().setVectorCol("vec").setK(2).linkFrom(trainDataBatchOp);
StreamingKMeansStreamOp streamingKMeansStreamOp = new StreamingKMeansStreamOp(model).setPredictionCol("pred").setTimeInterval(1L).setHalfLife(1).setReservedCols("vec").linkFrom(predictDataStreamOp, predictDataStreamOp);
CollectSinkStreamOp predSinkData = streamingKMeansStreamOp.link(new CollectSinkStreamOp());
StreamOperator.execute();
verifyExecutionResult(predSinkData.getAndRemoveValues());
}
use of com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp in project Alink by alibaba.
the class Chap17 method c_2_2.
static void c_2_2() throws Exception {
if (!new File(DATA_DIR + VECTOR_FILE).exists()) {
new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + VECTOR_FILE));
BatchOperator.execute();
}
AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
source.lazyPrint(5);
KMeansTrainBatchOp kmeans_model = new KMeansTrainBatchOp().setK(2).setVectorCol(VECTOR_COL_NAME);
KMeansPredictBatchOp kmeans_pred = new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME);
source.link(kmeans_model);
kmeans_pred.linkFrom(kmeans_model, source);
kmeans_model.lazyPrintModelInfo();
kmeans_pred.lazyPrint(5);
kmeans_pred.link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KMeans EUCLIDEAN"));
kmeans_pred.orderBy(PREDICTION_COL_NAME + ", " + LABEL_COL_NAME, 200, false).lazyPrint(-1, "all data");
BatchOperator.execute();
new KMeans().setK(2).setDistanceType(DistanceType.COSINE).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans COSINE"));
BatchOperator.execute();
}
Aggregations