Search in sources :

Example 1 with EvalClusterBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.

the class Chap17 method c_4.

static void c_4() throws Exception {
    AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
    new BisectingKMeans().setK(3).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo("BiSecting KMeans EUCLIDEAN").fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Bisecting KMeans EUCLIDEAN"));
    BatchOperator.execute();
    new BisectingKMeans().setDistanceType(DistanceType.COSINE).setK(3).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo("BiSecting KMeans COSINE").fit(source).transform(source).link(new EvalClusterBatchOp().setDistanceType("COSINE").setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Bisecting KMeans COSINE"));
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) BisectingKMeans(com.alibaba.alink.pipeline.clustering.BisectingKMeans) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 2 with EvalClusterBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.

the class Chap17 method c_3_2.

static void c_3_2() throws Exception {
    AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
    new GaussianMixture().setK(2).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("GaussianMixture 2"));
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) GaussianMixture(com.alibaba.alink.pipeline.clustering.GaussianMixture) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 3 with EvalClusterBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.

the class Chap17 method c_5.

static void c_5() throws Exception {
    BatchOperator.setParallelism(1);
    MemSourceBatchOp source = new MemSourceBatchOp(ROWS_DATA, new String[] { "State", "Region", "Division", "longitude", "latitude" });
    source.lazyPrint(5);
    source.select("Region").distinct().lazyPrint(-1);
    source.select("Division").distinct().lazyPrint(-1);
    source.groupBy("Region, Division", "Region, Division, COUNT(*) AS numStates").orderBy("Region, Division", 100).lazyPrint(-1);
    for (int nClusters : new int[] { 2, 4 }) {
        BatchOperator<?> pred = new GeoKMeans().setLongitudeCol("longitude").setLatitudeCol("latitude").setPredictionCol(PREDICTION_COL_NAME).setK(nClusters).fit(source).transform(source);
        pred.link(new EvalClusterBatchOp().setPredictionCol(PREDICTION_COL_NAME).setLabelCol("Region").lazyPrintMetrics(nClusters + " with Region"));
        pred.link(new EvalClusterBatchOp().setPredictionCol(PREDICTION_COL_NAME).setLabelCol("Division").lazyPrintMetrics(nClusters + " with Division"));
        BatchOperator.execute();
    }
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) GeoKMeans(com.alibaba.alink.pipeline.clustering.GeoKMeans) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 4 with EvalClusterBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.

the class Chap18 method c_2.

static void c_2() throws Exception {
    AkSourceBatchOp batch_source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    if (!new File(DATA_DIR + INIT_MODEL_FILE).exists()) {
        batch_source.sampleWithSize(100).link(new KMeansTrainBatchOp().setVectorCol(VECTOR_COL_NAME).setK(10)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE));
        BatchOperator.execute();
    }
    AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
    new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).linkFrom(init_model, batch_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Batch Prediction"));
    BatchOperator.execute();
    stream_source.link(new KMeansPredictStreamOp(init_model).setPredictionCol(PREDICTION_COL_NAME)).link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
    StreamOperator.execute();
    new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Stream Prediction"));
    BatchOperator.execute();
}
Also used : KMeansPredictBatchOp(com.alibaba.alink.operator.batch.clustering.KMeansPredictBatchOp) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSourceStreamOp(com.alibaba.alink.operator.stream.source.AkSourceStreamOp) KMeansPredictStreamOp(com.alibaba.alink.operator.stream.clustering.KMeansPredictStreamOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File) KMeansTrainBatchOp(com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 5 with EvalClusterBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp in project Alink by alibaba.

the class Chap18 method c_3.

static void c_3() throws Exception {
    AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
    StreamOperator<?> stream_pred = stream_source.link(new StreamingKMeansStreamOp(init_model).setTimeInterval(1L).setHalfLife(1).setPredictionCol(PREDICTION_COL_NAME)).select(PREDICTION_COL_NAME + ", " + LABEL_COL_NAME + ", " + VECTOR_COL_NAME);
    stream_pred.sample(0.001).print();
    stream_pred.link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
    StreamOperator.execute();
    new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("StreamingKMeans"));
    BatchOperator.execute();
}
Also used : AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSourceStreamOp(com.alibaba.alink.operator.stream.source.AkSourceStreamOp) StreamingKMeansStreamOp(com.alibaba.alink.operator.stream.clustering.StreamingKMeansStreamOp) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Aggregations

EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)11 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)9 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)4 KMeans (com.alibaba.alink.pipeline.clustering.KMeans)4 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)3 BisectingKMeans (com.alibaba.alink.pipeline.clustering.BisectingKMeans)3 File (java.io.File)3 KMeansPredictBatchOp (com.alibaba.alink.operator.batch.clustering.KMeansPredictBatchOp)2 KMeansTrainBatchOp (com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp)2 VectorAssemblerBatchOp (com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp)2 AkSinkStreamOp (com.alibaba.alink.operator.stream.sink.AkSinkStreamOp)2 AkSourceStreamOp (com.alibaba.alink.operator.stream.source.AkSourceStreamOp)2 GeoKMeans (com.alibaba.alink.pipeline.clustering.GeoKMeans)2 GmmPredictBatchOp (com.alibaba.alink.operator.batch.clustering.GmmPredictBatchOp)1 GmmTrainBatchOp (com.alibaba.alink.operator.batch.clustering.GmmTrainBatchOp)1 LdaPredictBatchOp (com.alibaba.alink.operator.batch.clustering.LdaPredictBatchOp)1 LdaTrainBatchOp (com.alibaba.alink.operator.batch.clustering.LdaTrainBatchOp)1 PcaPredictBatchOp (com.alibaba.alink.operator.batch.feature.PcaPredictBatchOp)1 PcaTrainBatchOp (com.alibaba.alink.operator.batch.feature.PcaTrainBatchOp)1 SegmentBatchOp (com.alibaba.alink.operator.batch.nlp.SegmentBatchOp)1