use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.
the class Chap14 method c_6.
static void c_6() throws Exception {
// prepare stream train data
CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(SCHEMA_STRING).setIgnoreFirstLine(true);
// load pipeline model
PipelineModel feature_pipelineModel = PipelineModel.load(DATA_DIR + FEATURE_MODEL_FILE);
// split stream to train and eval data
SplitStreamOp spliter = new SplitStreamOp().setFraction(0.5).linkFrom(data);
StreamOperator<?> train_stream_data = feature_pipelineModel.transform(spliter);
StreamOperator<?> test_stream_data = feature_pipelineModel.transform(spliter.getSideOutput(0));
AkSourceBatchOp initModel = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
// ftrl train
FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel).setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setWithIntercept(true).setAlpha(0.1).setBeta(0.1).setL1(0.01).setL2(0.01).setTimeInterval(10).setVectorSize(NUM_HASH_FEATURES).linkFrom(train_stream_data);
// model filter
FtrlModelFilterStreamOp model_filter = new FtrlModelFilterStreamOp().setPositiveLabelValueString("1").setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setAccuracyThreshold(0.83).setAucThreshold(0.71).linkFrom(model, train_stream_data);
model_filter.select("'Model' AS out_type, *").print();
// ftrl predict
FtrlPredictStreamOp predResult = new FtrlPredictStreamOp(initModel).setVectorCol(VEC_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setReservedCols(new String[] { LABEL_COL_NAME }).setPredictionDetailCol(PRED_DETAIL_COL_NAME).linkFrom(model_filter, test_stream_data);
predResult.sample(0.0001).select("'Pred Sample' AS out_type, *").print();
// ftrl eval
predResult.link(new EvalBinaryClassStreamOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).setTimeInterval(10)).link(new JsonValueStreamOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath(new String[] { "$.Accuracy", "$.AUC", "$.ConfusionMatrix" })).select("'Eval Metric' AS out_type, *").print();
StreamOperator.execute();
}
use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.
the class LogisticRegTest method pipelineTestBatch.
@Test
public void pipelineTestBatch() {
String[] xVars = new String[] { "f0", "f1", "f2", "f3" };
String yVar = "labels";
String vectorName = "vec";
String svectorName = "svec";
LogisticRegression lr = new LogisticRegression().setLabelCol(yVar).setFeatureCols(xVars).setPredictionCol("lrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
LogisticRegression vectorLr = new LogisticRegression().setLabelCol(yVar).setVectorCol(vectorName).setPredictionCol("vlrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
LogisticRegression sparseVectorLr = new LogisticRegression().setLabelCol(yVar).setVectorCol(svectorName).setPredictionCol("svlrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
Pipeline plLr = new Pipeline().add(lr).add(vectorLr).add(sparseVectorLr);
BatchOperator<?> trainData = getData();
PipelineModel model = plLr.fit(trainData);
BatchOperator<?> result = model.transform(trainData).select(new String[] { "labels", "lrpred", "vlrpred", "svlrpred" });
List<Row> data = result.collect();
for (Row row : data) {
for (int i = 1; i < 3; ++i) {
Assert.assertEquals(row.getField(0), row.getField(i));
}
}
}
use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.
the class SoftmaxTest method pipelineTest.
@Test
public void pipelineTest() throws Exception {
BatchOperator<?> vecdata = new MemSourceBatchOp(Arrays.asList(vecrows), veccolNames);
StreamOperator<?> svecdata = new MemSourceStreamOp(Arrays.asList(vecrows), veccolNames);
Pipeline pl = new Pipeline().add(softmax).add(vsoftmax).add(svsoftmax).add(vssoftmax);
PipelineModel model = pl.fit(vecdata);
BatchOperator<?> result = model.transform(vecdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" });
List<Row> data = result.lazyPrint(100).collect();
for (Row row : data) {
for (int i = 1; i < 3; ++i) {
Assert.assertEquals(row.getField(0), row.getField(i));
}
}
// below is stream test code
// below is stream test code.
CollectSinkStreamOp sop = model.transform(svecdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" }).link(new CollectSinkStreamOp());
StreamOperator.execute();
List<Row> rows = sop.getAndRemoveValues();
for (Row row : rows) {
for (int i = 1; i < 3; ++i) {
Assert.assertEquals(row.getField(0), row.getField(i));
}
}
}
use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.
the class SoftmaxTest method pipelineTest1.
@Test
public void pipelineTest1() {
BatchOperator<?> vecmdata = new MemSourceBatchOp(Arrays.asList(vecrows), veccolNames);
Pipeline pl = new Pipeline().add(softmax).add(vsoftmax).add(svsoftmax).add(vssoftmax);
PipelineModel modelm = pl.fit(vecmdata);
List<Row> data = modelm.transform(vecmdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" }).collect();
for (Row row : data) {
for (int i = 1; i < 3; ++i) {
Assert.assertEquals(row.getField(0), row.getField(i));
}
}
}
use of com.alibaba.alink.pipeline.PipelineModel in project Alink by alibaba.
the class LogisticRegressionMixVecTest method batchMixVecTest5.
@Test
public void batchMixVecTest5() {
BatchOperator<?> trainData = (BatchOperator<?>) getData();
Pipeline pipeline = new Pipeline().add(new LogisticRegression().setVectorCol("svec").setWithIntercept(false).setStandardization(false).setLabelCol("labels").setPredictionCol("pred"));
PipelineModel model = pipeline.fit(trainData);
model.transform(trainData).collect();
}
Aggregations