Search in sources :

Example 1 with LogisticRegressionTrainBatchOp

use of com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp in project Alink by alibaba.

the class Chap08 method c_5.

static void c_5() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    LogisticRegressionTrainBatchOp lrTrainer = new LogisticRegressionTrainBatchOp().setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME);
    LogisticRegressionPredictBatchOp lrPredictor = new LogisticRegressionPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
    train_data.link(lrTrainer);
    lrPredictor.linkFrom(lrTrainer, test_data);
    lrTrainer.lazyPrintTrainInfo().lazyPrintModelInfo();
    lrPredictor.lazyPrint(5, "< Prediction >").link(new AkSinkBatchOp().setFilePath(DATA_DIR + LR_PRED_FILE).setOverwriteSink(true));
    BatchOperator.execute();
}
Also used : LogisticRegressionPredictBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp) LogisticRegressionTrainBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)

Example 2 with LogisticRegressionTrainBatchOp

use of com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp in project Alink by alibaba.

the class FTRLExample method main.

public static void main(String[] args) throws Exception {
    String schemaStr = "id string, click string, dt string, C1 string, banner_pos int, site_id string, site_domain string, " + "site_category string, app_id string, app_domain string, app_category string, device_id string, " + "device_ip string, device_model string, device_type string, device_conn_type string, C14 int, C15 int, " + "C16 int, C17 int, C18 int, C19 int, C20 int, C21 int";
    CsvSourceBatchOp trainBatchData = new CsvSourceBatchOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-small.csv").setSchemaStr(schemaStr);
    trainBatchData.firstN(10).print();
    String labelColName = "click";
    String[] selectedColNames = new String[] { "C1", "banner_pos", "site_category", "app_domain", "app_category", "device_type", "device_conn_type", "C14", "C15", "C16", "C17", "C18", "C19", "C20", "C21", "site_id", "site_domain", "device_id", "device_model" };
    String[] categoryColNames = new String[] { "C1", "banner_pos", "site_category", "app_domain", "app_category", "device_type", "device_conn_type", "site_id", "site_domain", "device_id", "device_model" };
    String[] numericalColNames = new String[] { "C14", "C15", "C16", "C17", "C18", "C19", "C20", "C21" };
    // result column name of feature engineering
    String vecColName = "vec";
    int numHashFeatures = 30000;
    // setup feature engineering pipeline
    Pipeline featurePipeline = new Pipeline().add(new StandardScaler().setSelectedCols(numericalColNames)).add(new FeatureHasher().setSelectedCols(selectedColNames).setCategoricalCols(categoryColNames).setOutputCol(vecColName).setNumFeatures(numHashFeatures));
    // fit feature pipeline model
    PipelineModel featurePipelineModel = featurePipeline.fit(trainBatchData);
    // prepare stream train data
    CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(schemaStr).setIgnoreFirstLine(true);
    // split stream to train and eval data
    SplitStreamOp splitter = new SplitStreamOp().setFraction(0.5).linkFrom(data);
    // train initial batch model
    LogisticRegressionTrainBatchOp lr = new LogisticRegressionTrainBatchOp().setVectorCol(vecColName).setLabelCol(labelColName).setWithIntercept(true).setMaxIter(10);
    BatchOperator<?> initModel = featurePipelineModel.transform(trainBatchData).link(lr);
    // ftrl train
    FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel).setVectorCol(vecColName).setLabelCol(labelColName).setWithIntercept(true).setAlpha(0.1).setBeta(0.1).setL1(0.01).setL2(0.01).setTimeInterval(10).setVectorSize(numHashFeatures).linkFrom(featurePipelineModel.transform(splitter));
    // ftrl predict
    FtrlPredictStreamOp predictResult = new FtrlPredictStreamOp(initModel).setVectorCol(vecColName).setPredictionCol("pred").setReservedCols(new String[] { labelColName }).setPredictionDetailCol("details").linkFrom(model, featurePipelineModel.transform(splitter.getSideOutput(0)));
    // ftrl eval
    predictResult.link(new EvalBinaryClassStreamOp().setLabelCol(labelColName).setPredictionCol("pred").setPredictionDetailCol("details").setTimeInterval(10)).link(new JsonValueStreamOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath(new String[] { "$.Accuracy", "$.AUC", "$.ConfusionMatrix" })).print();
}
Also used : JsonValueStreamOp(com.alibaba.alink.operator.stream.dataproc.JsonValueStreamOp) LogisticRegressionTrainBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp) FtrlPredictStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlPredictStreamOp) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) SplitStreamOp(com.alibaba.alink.operator.stream.dataproc.SplitStreamOp) FeatureHasher(com.alibaba.alink.pipeline.feature.FeatureHasher) FtrlTrainStreamOp(com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp) StandardScaler(com.alibaba.alink.pipeline.dataproc.StandardScaler) EvalBinaryClassStreamOp(com.alibaba.alink.operator.stream.evaluation.EvalBinaryClassStreamOp) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp)

Example 3 with LogisticRegressionTrainBatchOp

use of com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp in project Alink by alibaba.

the class OnlineLearningTest method Test.

@Test
public void Test() throws Exception {
    String[] xVars = new String[] { "f0", "f1", "f2", "f3" };
    String yVar = "labels";
    BatchOperator trainData = (BatchOperator) getData(true);
    LogisticRegressionTrainBatchOp lr = new LogisticRegressionTrainBatchOp().setLabelCol(yVar).setFeatureCols(xVars).setOptimMethod("lbfgs").linkFrom(trainData);
    FtrlTrainStreamOp ftrl = new FtrlTrainStreamOp(lr).setAlpha(0.1).setBeta(0.1).setL1(0.1).setL2(0.1).setFeatureCols(xVars).setLabelCol(yVar).setTimeInterval(1).setWithIntercept(false);
    FtrlLearningKernel kernel = new FtrlLearningKernel();
    kernel.setModelParams(new Params(), 2, new Object[] { 1, 0 });
    kernel.calcLocalWx(new double[] { 1, 2 }, new DenseVector(2), 0);
    kernel.getFeedbackVar(new double[] { 1, 2 });
    double[] coef = new double[] { 2.0, 3.0 };
    kernel.updateModel(coef, new DenseVector(2), new double[] { 1, 1 }, 1L, 0, 0);
    SparseVector svec = new SparseVector(2);
    svec.add(0, 1);
    svec.add(1, 1);
    kernel.updateModel(coef, svec, new double[] { 1, 1 }, 1L, 0, 0);
    ftrl.setLearningKernel(kernel);
    Assert.assertEquals(coef[0], -0.08761006569007045, 0.0001);
    Assert.assertEquals(coef[1], -0.08761006569007045, 0.0001);
    FtrlTrainStreamOp ftrlw = new FtrlTrainStreamOp(lr, new Params()).setAlpha(0.1).setBeta(0.1).setL1(0.1).setL2(0.1).setFeatureCols(xVars).setLabelCol(yVar).setTimeInterval(1).setWithIntercept(false);
    FtrlPredictStreamOp pred = new FtrlPredictStreamOp(lr).setPredictionCol("pred").setVectorCol("vec");
    FtrlPredictStreamOp predp = new FtrlPredictStreamOp(lr, new Params()).setPredictionCol("pred").setVectorCol("vec");
}
Also used : LogisticRegressionTrainBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp) FtrlLearningKernel(com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp.FtrlLearningKernel) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) DenseVector(com.alibaba.alink.common.linalg.DenseVector) Test(org.junit.Test)

Example 4 with LogisticRegressionTrainBatchOp

use of com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp in project Alink by alibaba.

the class Chap10 method c_1.

static void c_1() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE).select(CLAUSE_CREATE_FEATURES);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE).select(CLAUSE_CREATE_FEATURES);
    String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
    train_data.lazyPrint(5, "< new features >");
    LogisticRegressionTrainBatchOp trainer = new LogisticRegressionTrainBatchOp().setFeatureCols(new_features).setLabelCol(LABEL_COL_NAME);
    LogisticRegressionPredictBatchOp predictor = new LogisticRegressionPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
    train_data.link(trainer);
    predictor.linkFrom(trainer, test_data);
    trainer.lazyPrintTrainInfo().lazyCollectTrainInfo(new Consumer<LinearModelTrainInfo>() {

        @Override
        public void accept(LinearModelTrainInfo linearModelTrainInfo) {
            printImportance(linearModelTrainInfo.getColNames(), linearModelTrainInfo.getImportance());
        }
    });
    predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : LogisticRegressionPredictBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp) LogisticRegressionTrainBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp) LinearModelTrainInfo(com.alibaba.alink.operator.common.linear.LinearModelTrainInfo) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 5 with LogisticRegressionTrainBatchOp

use of com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp in project Alink by alibaba.

the class Chap10 method c_2.

static void c_2() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE).select(CLAUSE_CREATE_FEATURES);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE).select(CLAUSE_CREATE_FEATURES);
    String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
    train_data.lazyPrint(5, "< new features >");
    LogisticRegressionTrainBatchOp trainer = new LogisticRegressionTrainBatchOp().setFeatureCols(new_features).setLabelCol(LABEL_COL_NAME).setL1(0.01);
    LogisticRegressionPredictBatchOp predictor = new LogisticRegressionPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
    train_data.link(trainer);
    predictor.linkFrom(trainer, test_data);
    trainer.lazyPrintTrainInfo().lazyCollectTrainInfo(new Consumer<LinearModelTrainInfo>() {

        @Override
        public void accept(LinearModelTrainInfo linearModelTrainInfo) {
            printImportance(linearModelTrainInfo.getColNames(), linearModelTrainInfo.getImportance());
        }
    });
    predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : LogisticRegressionPredictBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp) LogisticRegressionTrainBatchOp(com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp) LinearModelTrainInfo(com.alibaba.alink.operator.common.linear.LinearModelTrainInfo) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Aggregations

LogisticRegressionTrainBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp)6 LogisticRegressionPredictBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp)3 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)3 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)2 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)2 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)2 LinearModelTrainInfo (com.alibaba.alink.operator.common.linear.LinearModelTrainInfo)2 CsvSourceStreamOp (com.alibaba.alink.operator.stream.source.CsvSourceStreamOp)2 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)2 DenseVector (com.alibaba.alink.common.linalg.DenseVector)1 SparseVector (com.alibaba.alink.common.linalg.SparseVector)1 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)1 JsonValueStreamOp (com.alibaba.alink.operator.stream.dataproc.JsonValueStreamOp)1 SplitStreamOp (com.alibaba.alink.operator.stream.dataproc.SplitStreamOp)1 EvalBinaryClassStreamOp (com.alibaba.alink.operator.stream.evaluation.EvalBinaryClassStreamOp)1 FtrlPredictStreamOp (com.alibaba.alink.operator.stream.onlinelearning.FtrlPredictStreamOp)1 FtrlTrainStreamOp (com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp)1 FtrlLearningKernel (com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp.FtrlLearningKernel)1 Pipeline (com.alibaba.alink.pipeline.Pipeline)1 StandardScaler (com.alibaba.alink.pipeline.dataproc.StandardScaler)1