Search in sources :

Example 1 with PipelineModel

use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.

the class Chap14 method c_6.

static void c_6() throws Exception {
    // prepare stream train data
    CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(SCHEMA_STRING).setIgnoreFirstLine(true);
    // load pipeline model
    PipelineModel feature_pipelineModel = PipelineModel.load(DATA_DIR + FEATURE_MODEL_FILE);
    // split stream to train and eval data
    SplitStreamOp spliter = new SplitStreamOp().setFraction(0.5).linkFrom(data);
    StreamOperator<?> train_stream_data = feature_pipelineModel.transform(spliter);
    StreamOperator<?> test_stream_data = feature_pipelineModel.transform(spliter.getSideOutput(0));
    AkSourceBatchOp initModel = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
    // ftrl train
    FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel).setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setWithIntercept(true).setAlpha(0.1).setBeta(0.1).setL1(0.01).setL2(0.01).setTimeInterval(10).setVectorSize(NUM_HASH_FEATURES).linkFrom(train_stream_data);
    // model filter
    FtrlModelFilterStreamOp model_filter = new FtrlModelFilterStreamOp().setPositiveLabelValueString("1").setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setAccuracyThreshold(0.83).setAucThreshold(0.71).linkFrom(model, train_stream_data);
    model_filter.select("'Model' AS out_type, *").print();
    // ftrl predict
    FtrlPredictStreamOp predResult = new FtrlPredictStreamOp(initModel).setVectorCol(VEC_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setReservedCols(new String[] { LABEL_COL_NAME }).setPredictionDetailCol(PRED_DETAIL_COL_NAME).linkFrom(model_filter, test_stream_data);
    predResult.sample(0.0001).select("'Pred Sample' AS out_type, *").print();
    // ftrl eval
    predResult.link(new EvalBinaryClassStreamOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).setTimeInterval(10)).link(new JsonValueStreamOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath(new String[] { "$.Accuracy", "$.AUC", "$.ConfusionMatrix" })).select("'Eval Metric' AS out_type, *").print();
    StreamOperator.execute();
}
Also used : JsonValueStreamOp(com.alibaba.alink.operator.stream.dataproc.JsonValueStreamOp) SplitStreamOp(com.alibaba.alink.operator.stream.dataproc.SplitStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) FtrlTrainStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp) FtrlPredictStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlPredictStreamOp) EvalBinaryClassStreamOp(com.alibaba.alink.operator.stream.evaluation.EvalBinaryClassStreamOp) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp) FtrlModelFilterStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlModelFilterStreamOp) PipelineModel(com.alibaba.alink.pipeline.PipelineModel)

Example 2 with PipelineModel

use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.

the class LogisticRegTest method pipelineTestBatch.

@Test
public void pipelineTestBatch() {
    String[] xVars = new String[] { "f0", "f1", "f2", "f3" };
    String yVar = "labels";
    String vectorName = "vec";
    String svectorName = "svec";
    LogisticRegression lr = new LogisticRegression().setLabelCol(yVar).setFeatureCols(xVars).setPredictionCol("lrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
    LogisticRegression vectorLr = new LogisticRegression().setLabelCol(yVar).setVectorCol(vectorName).setPredictionCol("vlrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
    LogisticRegression sparseVectorLr = new LogisticRegression().setLabelCol(yVar).setVectorCol(svectorName).setPredictionCol("svlrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
    Pipeline plLr = new Pipeline().add(lr).add(vectorLr).add(sparseVectorLr);
    BatchOperator<?> trainData = getData();
    PipelineModel model = plLr.fit(trainData);
    BatchOperator<?> result = model.transform(trainData).select(new String[] { "labels", "lrpred", "vlrpred", "svlrpred" });
    List<Row> data = result.collect();
    for (Row row : data) {
        for (int i = 1; i < 3; ++i) {
            Assert.assertEquals(row.getField(0), row.getField(i));
        }
    }
}
Also used : Row(org.apache.flink.types.Row) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) Test(org.junit.Test)

Example 3 with PipelineModel

use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.

the class SoftmaxTest method pipelineTest.

@Test
public void pipelineTest() throws Exception {
    BatchOperator<?> vecdata = new MemSourceBatchOp(Arrays.asList(vecrows), veccolNames);
    StreamOperator<?> svecdata = new MemSourceStreamOp(Arrays.asList(vecrows), veccolNames);
    Pipeline pl = new Pipeline().add(softmax).add(vsoftmax).add(svsoftmax).add(vssoftmax);
    PipelineModel model = pl.fit(vecdata);
    BatchOperator<?> result = model.transform(vecdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" });
    List<Row> data = result.lazyPrint(100).collect();
    for (Row row : data) {
        for (int i = 1; i < 3; ++i) {
            Assert.assertEquals(row.getField(0), row.getField(i));
        }
    }
    // below is stream test code
    // below is stream test code.
    CollectSinkStreamOp sop = model.transform(svecdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" }).link(new CollectSinkStreamOp());
    StreamOperator.execute();
    List<Row> rows = sop.getAndRemoveValues();
    for (Row row : rows) {
        for (int i = 1; i < 3; ++i) {
            Assert.assertEquals(row.getField(0), row.getField(i));
        }
    }
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) Test(org.junit.Test)

Example 4 with PipelineModel

use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.

the class SoftmaxTest method pipelineTest1.

@Test
public void pipelineTest1() {
    BatchOperator<?> vecmdata = new MemSourceBatchOp(Arrays.asList(vecrows), veccolNames);
    Pipeline pl = new Pipeline().add(softmax).add(vsoftmax).add(svsoftmax).add(vssoftmax);
    PipelineModel modelm = pl.fit(vecmdata);
    List<Row> data = modelm.transform(vecmdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" }).collect();
    for (Row row : data) {
        for (int i = 1; i < 3; ++i) {
            Assert.assertEquals(row.getField(0), row.getField(i));
        }
    }
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) Row(org.apache.flink.types.Row) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) Test(org.junit.Test)

Example 5 with PipelineModel

use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.

the class LogisticRegressionMixVecTest method batchMixVecTest5.

@Test
public void batchMixVecTest5() {
    BatchOperator<?> trainData = (BatchOperator<?>) getData();
    Pipeline pipeline = new Pipeline().add(new LogisticRegression().setVectorCol("svec").setWithIntercept(false).setStandardization(false).setLabelCol("labels").setPredictionCol("pred"));
    PipelineModel model = pipeline.fit(trainData);
    model.transform(trainData).collect();
}
Also used : LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) Test(org.junit.Test)

Aggregations

PipelineModel (com.alibaba.alink.pipeline.PipelineModel)43 Pipeline (com.alibaba.alink.pipeline.Pipeline)34 Test (org.junit.Test)30 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)17 Row (org.apache.flink.types.Row)15 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)14 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)10 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)9 VectorAssembler (com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler)7 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)6 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)6 CsvSourceStreamOp (com.alibaba.alink.operator.stream.source.CsvSourceStreamOp)4 File (java.io.File)4 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)3 TableSourceBatchOp (com.alibaba.alink.operator.batch.source.TableSourceBatchOp)3 JsonValueStreamOp (com.alibaba.alink.operator.stream.dataproc.JsonValueStreamOp)3 SplitStreamOp (com.alibaba.alink.operator.stream.dataproc.SplitStreamOp)3 EvalBinaryClassStreamOp (com.alibaba.alink.operator.stream.evaluation.EvalBinaryClassStreamOp)3 FtrlPredictStreamOp (com.alibaba.alink.operator.stream.onlinelearning.FtrlPredictStreamOp)3 FtrlTrainStreamOp (com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp)3