Search in sources :

Example 56 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap12 method c_3.

static void c_3() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    NaiveBayesTrainBatchOp trainer = new NaiveBayesTrainBatchOp().setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME);
    NaiveBayesPredictBatchOp predictor = new NaiveBayesPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
    train_data.link(trainer);
    predictor.linkFrom(trainer, test_data);
    trainer.lazyPrintModelInfo();
    predictor.lazyPrint(1, "< Prediction >");
    predictor.link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("NaiveBayes"));
    BatchOperator.execute();
}
Also used : EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) NaiveBayesPredictBatchOp(com.alibaba.alink.operator.batch.classification.NaiveBayesPredictBatchOp) NaiveBayesTrainBatchOp(com.alibaba.alink.operator.batch.classification.NaiveBayesTrainBatchOp)

Example 57 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap12 method c_4.

static void c_4() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    new OneVsRest().setClassifier(new LogisticRegression().setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).setNumClass(3).fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("OneVsRest_LogisticRegression"));
    new OneVsRest().setClassifier(new GbdtClassifier().setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).setNumClass(3).fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("OneVsRest_GBDT"));
    new OneVsRest().setClassifier(new LinearSvm().setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).setNumClass(3).fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("OneVsRest_LinearSvm"));
    BatchOperator.execute();
}
Also used : EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) OneVsRest(com.alibaba.alink.pipeline.classification.OneVsRest) GbdtClassifier(com.alibaba.alink.pipeline.classification.GbdtClassifier) LinearSvm(com.alibaba.alink.pipeline.classification.LinearSvm) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression)

Example 58 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap13 method c_5.

static void c_5() throws Exception {
    BatchOperator.setParallelism(4);
    if (!new File(DATA_DIR + TABLE_TRAIN_FILE).exists()) {
        AkSourceBatchOp train_sparse = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
        AkSourceBatchOp test_sparse = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE);
        StringBuilder sbd = new StringBuilder();
        sbd.append("c_0 double");
        for (int i = 1; i < 784; i++) {
            sbd.append(", c_").append(i).append(" double");
        }
        new VectorToColumns().setVectorCol(VECTOR_COL_NAME).setSchemaStr(sbd.toString()).setReservedCols(LABEL_COL_NAME).transform(train_sparse).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TABLE_TRAIN_FILE));
        new VectorToColumns().setVectorCol(VECTOR_COL_NAME).setSchemaStr(sbd.toString()).setReservedCols(LABEL_COL_NAME).transform(test_sparse).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TABLE_TEST_FILE));
        BatchOperator.execute();
    }
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TABLE_TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TABLE_TEST_FILE);
    final String[] featureColNames = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
    train_data.lazyPrint(5);
    Stopwatch sw = new Stopwatch();
    for (TreeType treeType : new TreeType[] { TreeType.GINI, TreeType.INFOGAIN, TreeType.INFOGAINRATIO }) {
        sw.reset();
        sw.start();
        new DecisionTreeClassifier().setTreeType(treeType).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("DecisionTreeClassifier " + treeType.toString()));
        BatchOperator.execute();
        sw.stop();
        System.out.println(sw.getElapsedTimeSpan());
    }
    for (int numTrees : new int[] { 2, 4, 8, 16, 32, 64, 128 }) {
        sw.reset();
        sw.start();
        new RandomForestClassifier().setSubsamplingRatio(0.6).setNumTreesOfInfoGain(numTrees).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("RandomForestClassifier : " + numTrees));
        BatchOperator.execute();
        sw.stop();
        System.out.println(sw.getElapsedTimeSpan());
    }
}
Also used : TreeType(com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType) EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) RandomForestClassifier(com.alibaba.alink.pipeline.classification.RandomForestClassifier) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) VectorToColumns(com.alibaba.alink.pipeline.dataproc.format.VectorToColumns) DecisionTreeClassifier(com.alibaba.alink.pipeline.classification.DecisionTreeClassifier) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File)

Example 59 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap13 method c_2.

static void c_2() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE);
    new Softmax().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintTrainInfo().enableLazyPrintModelInfo().fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Softmax"));
    BatchOperator.execute();
}
Also used : EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Softmax(com.alibaba.alink.pipeline.classification.Softmax)

Example 60 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap14 method c_5.

static void c_5() throws Exception {
    // load pipeline model
    PipelineModel feature_pipelineModel = PipelineModel.load(DATA_DIR + FEATURE_MODEL_FILE);
    BatchOperator initModel = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
    // prepare stream train data
    CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(SCHEMA_STRING).setIgnoreFirstLine(true);
    // split stream to train and eval data
    SplitStreamOp spliter = new SplitStreamOp().setFraction(0.5).linkFrom(data);
    StreamOperator train_stream_data = feature_pipelineModel.transform(spliter);
    StreamOperator test_stream_data = feature_pipelineModel.transform(spliter.getSideOutput(0));
    // ftrl train
    FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel).setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setWithIntercept(true).setAlpha(0.1).setBeta(0.1).setL1(0.01).setL2(0.01).setTimeInterval(10).setVectorSize(NUM_HASH_FEATURES).linkFrom(train_stream_data);
    // ftrl predict
    FtrlPredictStreamOp predResult = new FtrlPredictStreamOp(initModel).setVectorCol(VEC_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setReservedCols(new String[] { LABEL_COL_NAME }).setPredictionDetailCol(PRED_DETAIL_COL_NAME).linkFrom(model, test_stream_data);
    predResult.sample(0.0001).select("'Pred Sample' AS out_type, *").print();
    // ftrl eval
    predResult.link(new EvalBinaryClassStreamOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).setTimeInterval(10)).link(new JsonValueStreamOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath(new String[] { "$.Accuracy", "$.AUC", "$.ConfusionMatrix" })).select("'Eval Metric' AS out_type, *").print();
    StreamOperator.execute();
}
Also used : JsonValueStreamOp(com.alibaba.alink.operator.stream.dataproc.JsonValueStreamOp) SplitStreamOp(com.alibaba.alink.operator.stream.dataproc.SplitStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) FtrlTrainStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp) FtrlPredictStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlPredictStreamOp) EvalBinaryClassStreamOp(com.alibaba.alink.operator.stream.evaluation.EvalBinaryClassStreamOp) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) PipelineModel(com.alibaba.alink.pipeline.PipelineModel)

Aggregations

AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)66 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)20 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)18 File (java.io.File)16 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)10 Pipeline (com.alibaba.alink.pipeline.Pipeline)10 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)10 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)9 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)8 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)7 Row (org.apache.flink.types.Row)6 Test (org.junit.Test)6 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)5 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)5 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)5 ArrayList (java.util.ArrayList)4 PluginDownloader (com.alibaba.alink.common.io.plugin.PluginDownloader)3 RegisterKey (com.alibaba.alink.common.io.plugin.RegisterKey)3 LogisticRegressionPredictBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp)3 LogisticRegressionTrainBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp)3