Search in sources :

Example 16 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap13 method c_3.

static void c_3() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE);
    BatchOperator.setParallelism(1);
    new OneVsRest().setClassifier(new LogisticRegression().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).setNumClass(10).fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("OneVsRest - LogisticRegression"));
    new OneVsRest().setClassifier(new LinearSvm().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).setNumClass(10).fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("OneVsRest - LinearSvm"));
    BatchOperator.execute();
}
Also used : EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) OneVsRest(com.alibaba.alink.pipeline.classification.OneVsRest) LinearSvm(com.alibaba.alink.pipeline.classification.LinearSvm) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression)

Example 17 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap14 method c_6.

static void c_6() throws Exception {
    // prepare stream train data
    CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(SCHEMA_STRING).setIgnoreFirstLine(true);
    // load pipeline model
    PipelineModel feature_pipelineModel = PipelineModel.load(DATA_DIR + FEATURE_MODEL_FILE);
    // split stream to train and eval data
    SplitStreamOp spliter = new SplitStreamOp().setFraction(0.5).linkFrom(data);
    StreamOperator<?> train_stream_data = feature_pipelineModel.transform(spliter);
    StreamOperator<?> test_stream_data = feature_pipelineModel.transform(spliter.getSideOutput(0));
    AkSourceBatchOp initModel = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
    // ftrl train
    FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel).setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setWithIntercept(true).setAlpha(0.1).setBeta(0.1).setL1(0.01).setL2(0.01).setTimeInterval(10).setVectorSize(NUM_HASH_FEATURES).linkFrom(train_stream_data);
    // model filter
    FtrlModelFilterStreamOp model_filter = new FtrlModelFilterStreamOp().setPositiveLabelValueString("1").setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setAccuracyThreshold(0.83).setAucThreshold(0.71).linkFrom(model, train_stream_data);
    model_filter.select("'Model' AS out_type, *").print();
    // ftrl predict
    FtrlPredictStreamOp predResult = new FtrlPredictStreamOp(initModel).setVectorCol(VEC_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setReservedCols(new String[] { LABEL_COL_NAME }).setPredictionDetailCol(PRED_DETAIL_COL_NAME).linkFrom(model_filter, test_stream_data);
    predResult.sample(0.0001).select("'Pred Sample' AS out_type, *").print();
    // ftrl eval
    predResult.link(new EvalBinaryClassStreamOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).setTimeInterval(10)).link(new JsonValueStreamOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath(new String[] { "$.Accuracy", "$.AUC", "$.ConfusionMatrix" })).select("'Eval Metric' AS out_type, *").print();
    StreamOperator.execute();
}
Also used : JsonValueStreamOp(com.alibaba.alink.operator.stream.dataproc.JsonValueStreamOp) SplitStreamOp(com.alibaba.alink.operator.stream.dataproc.SplitStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) FtrlTrainStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp) FtrlPredictStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlPredictStreamOp) EvalBinaryClassStreamOp(com.alibaba.alink.operator.stream.evaluation.EvalBinaryClassStreamOp) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp) FtrlModelFilterStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlModelFilterStreamOp) PipelineModel(com.alibaba.alink.pipeline.PipelineModel)

Example 18 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap16 method c_3.

static void c_3() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    new LinearRegression().setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintTrainInfo().enableLazyPrintModelInfo().fit(train_data).transform(test_data).link(new EvalRegressionBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("LinearRegression"));
    new LassoRegression().setLambda(0.05).setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintTrainInfo().enableLazyPrintModelInfo("< LASSO model >").fit(train_data).transform(test_data).link(new EvalRegressionBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("LassoRegression"));
    BatchOperator.execute();
}
Also used : LassoRegression(com.alibaba.alink.pipeline.regression.LassoRegression) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) EvalRegressionBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalRegressionBatchOp) LinearRegression(com.alibaba.alink.pipeline.regression.LinearRegression)

Example 19 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap16 method c_5.

static void c_5() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    for (int numTrees : new int[] { 16, 32, 64, 128, 256, 512 }) {
        new GbdtRegressor().setLearningRate(0.05).setMaxLeaves(256).setFeatureSubsamplingRatio(0.3).setMinSamplesPerLeaf(2).setMaxDepth(100).setNumTrees(numTrees).setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(train_data).transform(test_data).link(new EvalRegressionBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("GbdtRegressor - " + numTrees));
        BatchOperator.execute();
    }
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) EvalRegressionBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalRegressionBatchOp) GbdtRegressor(com.alibaba.alink.pipeline.regression.GbdtRegressor)

Example 20 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap16 method c_4.

static void c_4() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    new DecisionTreeRegressor().setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(train_data).transform(test_data).link(new EvalRegressionBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("DecisionTreeRegressor"));
    BatchOperator.execute();
    for (int numTrees : new int[] { 2, 4, 8, 16, 32, 64, 128 }) {
        new RandomForestRegressor().setNumTrees(numTrees).setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(train_data).transform(test_data).link(new EvalRegressionBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("RandomForestRegressor - " + numTrees));
        BatchOperator.execute();
    }
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) DecisionTreeRegressor(com.alibaba.alink.pipeline.regression.DecisionTreeRegressor) EvalRegressionBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalRegressionBatchOp) RandomForestRegressor(com.alibaba.alink.pipeline.regression.RandomForestRegressor)

Aggregations

AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)66 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)20 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)18 File (java.io.File)16 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)10 Pipeline (com.alibaba.alink.pipeline.Pipeline)10 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)10 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)9 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)8 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)7 Row (org.apache.flink.types.Row)6 Test (org.junit.Test)6 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)5 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)5 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)5 ArrayList (java.util.ArrayList)4 PluginDownloader (com.alibaba.alink.common.io.plugin.PluginDownloader)3 RegisterKey (com.alibaba.alink.common.io.plugin.RegisterKey)3 LogisticRegressionPredictBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp)3 LogisticRegressionTrainBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp)3