Search in sources :

Example 21 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap17 method c_4.

static void c_4() throws Exception {
    AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
    new BisectingKMeans().setK(3).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo("BiSecting KMeans EUCLIDEAN").fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Bisecting KMeans EUCLIDEAN"));
    BatchOperator.execute();
    new BisectingKMeans().setDistanceType(DistanceType.COSINE).setK(3).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo("BiSecting KMeans COSINE").fit(source).transform(source).link(new EvalClusterBatchOp().setDistanceType("COSINE").setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Bisecting KMeans COSINE"));
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) BisectingKMeans(com.alibaba.alink.pipeline.clustering.BisectingKMeans) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 22 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap17 method c_3_2.

static void c_3_2() throws Exception {
    AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
    new GaussianMixture().setK(2).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("GaussianMixture 2"));
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) GaussianMixture(com.alibaba.alink.pipeline.clustering.GaussianMixture) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 23 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap18 method c_2.

static void c_2() throws Exception {
    AkSourceBatchOp batch_source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    if (!new File(DATA_DIR + INIT_MODEL_FILE).exists()) {
        batch_source.sampleWithSize(100).link(new KMeansTrainBatchOp().setVectorCol(VECTOR_COL_NAME).setK(10)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE));
        BatchOperator.execute();
    }
    AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
    new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).linkFrom(init_model, batch_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Batch Prediction"));
    BatchOperator.execute();
    stream_source.link(new KMeansPredictStreamOp(init_model).setPredictionCol(PREDICTION_COL_NAME)).link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
    StreamOperator.execute();
    new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Stream Prediction"));
    BatchOperator.execute();
}
Also used : KMeansPredictBatchOp(com.alibaba.alink.operator.batch.clustering.KMeansPredictBatchOp) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSourceStreamOp(com.alibaba.alink.operator.stream.source.AkSourceStreamOp) KMeansPredictStreamOp(com.alibaba.alink.operator.stream.clustering.KMeansPredictStreamOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File) KMeansTrainBatchOp(com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 24 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap18 method c_3.

static void c_3() throws Exception {
    AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
    StreamOperator<?> stream_pred = stream_source.link(new StreamingKMeansStreamOp(init_model).setTimeInterval(1L).setHalfLife(1).setPredictionCol(PREDICTION_COL_NAME)).select(PREDICTION_COL_NAME + ", " + LABEL_COL_NAME + ", " + VECTOR_COL_NAME);
    stream_pred.sample(0.001).print();
    stream_pred.link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
    StreamOperator.execute();
    new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("StreamingKMeans"));
    BatchOperator.execute();
}
Also used : AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSourceStreamOp(com.alibaba.alink.operator.stream.source.AkSourceStreamOp) StreamingKMeansStreamOp(com.alibaba.alink.operator.stream.clustering.StreamingKMeansStreamOp) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 25 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap19 method c_3.

static void c_3() throws Exception {
    AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    source.link(new PcaTrainBatchOp().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).lazyPrintModelInfo()).link(new AkSinkBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE).setOverwriteSink(true));
    BatchOperator.execute();
    BatchOperator<?> pca_result = new PcaPredictBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE), source);
    Stopwatch sw = new Stopwatch();
    KMeans kmeans = new KMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME);
    sw.reset();
    sw.start();
    kmeans.fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    kmeans.fit(pca_result).transform(pca_result).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans + PCA"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) KMeans(com.alibaba.alink.pipeline.clustering.KMeans) PcaPredictBatchOp(com.alibaba.alink.operator.batch.feature.PcaPredictBatchOp) PcaTrainBatchOp(com.alibaba.alink.operator.batch.feature.PcaTrainBatchOp) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Aggregations

AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)66 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)20 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)18 File (java.io.File)16 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)10 Pipeline (com.alibaba.alink.pipeline.Pipeline)10 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)10 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)9 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)8 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)7 Row (org.apache.flink.types.Row)6 Test (org.junit.Test)6 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)5 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)5 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)5 ArrayList (java.util.ArrayList)4 PluginDownloader (com.alibaba.alink.common.io.plugin.PluginDownloader)3 RegisterKey (com.alibaba.alink.common.io.plugin.RegisterKey)3 LogisticRegressionPredictBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp)3 LogisticRegressionTrainBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp)3