Search in sources :

Example 51 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap10 method c_1.

static void c_1() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE).select(CLAUSE_CREATE_FEATURES);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE).select(CLAUSE_CREATE_FEATURES);
    String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
    train_data.lazyPrint(5, "< new features >");
    LogisticRegressionTrainBatchOp trainer = new LogisticRegressionTrainBatchOp().setFeatureCols(new_features).setLabelCol(LABEL_COL_NAME);
    LogisticRegressionPredictBatchOp predictor = new LogisticRegressionPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
    train_data.link(trainer);
    predictor.linkFrom(trainer, test_data);
    trainer.lazyPrintTrainInfo().lazyCollectTrainInfo(new Consumer<LinearModelTrainInfo>() {

        @Override
        public void accept(LinearModelTrainInfo linearModelTrainInfo) {
            printImportance(linearModelTrainInfo.getColNames(), linearModelTrainInfo.getImportance());
        }
    });
    predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : LogisticRegressionPredictBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp) LogisticRegressionTrainBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp) LinearModelTrainInfo(com.alibaba.alink.operator.common.linear.LinearModelTrainInfo) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 52 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap10 method c_2.

static void c_2() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE).select(CLAUSE_CREATE_FEATURES);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE).select(CLAUSE_CREATE_FEATURES);
    String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
    train_data.lazyPrint(5, "< new features >");
    LogisticRegressionTrainBatchOp trainer = new LogisticRegressionTrainBatchOp().setFeatureCols(new_features).setLabelCol(LABEL_COL_NAME).setL1(0.01);
    LogisticRegressionPredictBatchOp predictor = new LogisticRegressionPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
    train_data.link(trainer);
    predictor.linkFrom(trainer, test_data);
    trainer.lazyPrintTrainInfo().lazyCollectTrainInfo(new Consumer<LinearModelTrainInfo>() {

        @Override
        public void accept(LinearModelTrainInfo linearModelTrainInfo) {
            printImportance(linearModelTrainInfo.getColNames(), linearModelTrainInfo.getImportance());
        }
    });
    predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : LogisticRegressionPredictBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp) LogisticRegressionTrainBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp) LinearModelTrainInfo(com.alibaba.alink.operator.common.linear.LinearModelTrainInfo) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 53 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap10 method c_3_1.

static void c_3_1() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    Pipeline pipeline = new Pipeline().add(new OneHotEncoder().setSelectedCols(CATEGORY_FEATURE_COL_NAMES).setEncode(Encode.VECTOR)).add(new VectorAssembler().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VEC_COL_NAME)).add(new LogisticRegression().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME));
    pipeline.fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : OneHotEncoder(com.alibaba.alink.pipeline.feature.OneHotEncoder) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) VectorAssembler(com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) Pipeline(com.alibaba.alink.pipeline.Pipeline) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 54 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap11 method c_8.

static void c_8() throws Exception {
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    AkSourceBatchOp train_sample = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE);
    String[] featureColNames = ArrayUtils.removeElement(test_data.getColNames(), LABEL_COL_NAME);
    new GbdtClassifier().setNumTrees(100).setMaxDepth(5).setMaxBins(256).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("GBDT with Stratified Sample"));
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) GbdtClassifier(com.alibaba.alink.pipeline.classification.GbdtClassifier) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 55 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap11 method c_4.

static void c_4() throws Exception {
    AkSourceBatchOp all_data = new AkSourceBatchOp().setFilePath(DATA_DIR + FEATURE_LABEL_FILE);
    all_data.lazyPrintStatistics().groupBy("label", "label, COUNT(*) AS cnt").print();
    Utils.splitTrainTestIfNotExist(all_data, DATA_DIR + TRAIN_FILE, DATA_DIR + TEST_FILE, 0.8);
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    String[] featureColNames = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
    new LogisticRegression().setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
    BatchOperator.execute();
    if (!new File(DATA_DIR + TRAIN_SAMPLE_FILE).exists()) {
        train_data.link(new StratifiedSampleBatchOp().setStrataRatios("0:0.05,1:1.0").setStrataCol(LABEL_COL_NAME)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE));
        BatchOperator.execute();
    }
    AkSourceBatchOp train_sample = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE);
    new LogisticRegression().setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression with Stratified Sample"));
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) StratifiedSampleBatchOp(com.alibaba.alink.operator.batch.dataproc.StratifiedSampleBatchOp) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Aggregations

AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)66 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)20 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)18 File (java.io.File)16 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)10 Pipeline (com.alibaba.alink.pipeline.Pipeline)10 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)10 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)9 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)8 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)7 Row (org.apache.flink.types.Row)6 Test (org.junit.Test)6 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)5 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)5 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)5 ArrayList (java.util.ArrayList)4 PluginDownloader (com.alibaba.alink.common.io.plugin.PluginDownloader)3 RegisterKey (com.alibaba.alink.common.io.plugin.RegisterKey)3 LogisticRegressionPredictBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp)3 LogisticRegressionTrainBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp)3