Search in sources :

Example 1 with Select

use of com.alibaba.alink.pipeline.sql.Select in project Alink by alibaba.

the class LocalPredictorTest method getPipeline.

protected Pipeline getPipeline() {
    // model mapper
    QuantileDiscretizer quantileDiscretizer = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length");
    // SISO mapper
    Binarizer binarizer = new Binarizer().setSelectedCol("petal_width").setOutputCol("bina").setReservedCols("sepal_length", "petal_width", "petal_length", "category").setThreshold(1.);
    // MISO Mapper
    VectorAssembler assembler = new VectorAssembler().setSelectedCols("sepal_length", "petal_width").setOutputCol("assem").setReservedCols("sepal_length", "petal_width", "petal_length", "category");
    // Lda
    Lda lda = new Lda().setPredictionCol("lda_pred").setPredictionDetailCol("lda_pred_detail").setSelectedCol("category").setTopicNum(2).setRandomSeed(0);
    Select select = new Select().setClause("cast(sepal_length as double) as sepal_length, " + "cast(petal_width as double) as petal_width, " + "cast(petal_length as double) as petal_length, " + "category");
    // Glm
    GeneralizedLinearRegression glm = new GeneralizedLinearRegression().setFeatureCols("sepal_length", "petal_width").setLabelCol("petal_length").setPredictionCol("glm_pred");
    return new Pipeline().add(binarizer).add(assembler).add(quantileDiscretizer).add(glm);
}
Also used : VectorAssembler(com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler) GeneralizedLinearRegression(com.alibaba.alink.pipeline.regression.GeneralizedLinearRegression) Lda(com.alibaba.alink.pipeline.clustering.Lda) Select(com.alibaba.alink.pipeline.sql.Select) Binarizer(com.alibaba.alink.pipeline.feature.Binarizer) QuantileDiscretizer(com.alibaba.alink.pipeline.feature.QuantileDiscretizer)

Example 2 with Select

use of com.alibaba.alink.pipeline.sql.Select in project Alink by alibaba.

the class Chap01 method c_5_4.

static void c_5_4() throws Exception {
    MemSourceBatchOp train_set = new MemSourceBatchOp(new Row[] { Row.of(2009, 0.5), Row.of(2010, 9.36), Row.of(2011, 52.0), Row.of(2012, 191.0), Row.of(2013, 350.0), Row.of(2014, 571.0), Row.of(2015, 912.0), Row.of(2016, 1207.0), Row.of(2017, 1682.0) }, new String[] { "x", "gmv" });
    Pipeline pipeline = new Pipeline().add(new Select().setClause("*, x*x AS x2")).add(new LinearRegression().setFeatureCols("x", "x2").setLabelCol("gmv").setPredictionCol("pred"));
    File file = new File(DATA_DIR + "gmv_pipeline.model");
    if (file.exists()) {
        file.delete();
    }
    pipeline.fit(train_set).save(DATA_DIR + "gmv_pipeline.model");
    BatchOperator.execute();
    PipelineModel pipelineModel = PipelineModel.load(DATA_DIR + "gmv_pipeline.model");
    BatchOperator<?> pred_batch = new MemSourceBatchOp(new Integer[] { 2018, 2019 }, "x");
    pipelineModel.transform(pred_batch).print();
    MemSourceStreamOp pred_stream = new MemSourceStreamOp(new Integer[] { 2018, 2019 }, "x");
    pipelineModel.transform(pred_stream).print();
    StreamOperator.execute();
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) Select(com.alibaba.alink.pipeline.sql.Select) LinearRegression(com.alibaba.alink.pipeline.regression.LinearRegression) File(java.io.File) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel)

Example 3 with Select

use of com.alibaba.alink.pipeline.sql.Select in project Alink by alibaba.

the class Chap02 method c_6.

static void c_6() throws Exception {
    MemSourceBatchOp source = new MemSourceBatchOp(new Row[] { Row.of("sunny", 85.0, 85.0, false, "no"), Row.of("sunny", 80.0, 90.0, true, "no"), Row.of("overcast", 83.0, 78.0, false, "yes"), Row.of("rainy", 70.0, 96.0, false, "yes"), Row.of("rainy", 68.0, 80.0, false, "yes"), Row.of("rainy", 65.0, 70.0, true, "no"), Row.of("overcast", 64.0, 65.0, true, "yes"), Row.of("sunny", 72.0, 95.0, false, "no"), Row.of("sunny", 69.0, 70.0, false, "yes"), Row.of("rainy", 75.0, 80.0, false, "yes"), Row.of("sunny", 75.0, 70.0, true, "yes"), Row.of("overcast", 72.0, 90.0, true, "yes"), Row.of("overcast", 81.0, 75.0, false, "yes"), Row.of("rainy", 71.0, 80.0, true, "no") }, new String[] { "outlook", "Temperature", "Humidity", "Windy", "play" });
    source.link(new C45TrainBatchOp().setFeatureCols("outlook", "Temperature", "Humidity", "Windy").setCategoricalCols("outlook", "Windy").setLabelCol("play")).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TREE_MODEL_FILE).setOverwriteSink(true));
    BatchOperator.execute();
    new AkSourceBatchOp().setFilePath(DATA_DIR + TREE_MODEL_FILE).link(new DecisionTreeModelInfoBatchOp().lazyPrintModelInfo().lazyCollectModelInfo(new Consumer<DecisionTreeModelInfo>() {

        @Override
        public void accept(DecisionTreeModelInfo decisionTreeModelInfo) {
            try {
                decisionTreeModelInfo.saveTreeAsImage(DATA_DIR + "tree_model.png", true);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }));
    BatchOperator.execute();
    MemSourceBatchOp train_set = new MemSourceBatchOp(new Row[] { Row.of(2009, 0.5), Row.of(2010, 9.36), Row.of(2011, 52.0), Row.of(2012, 191.0), Row.of(2013, 350.0), Row.of(2014, 571.0), Row.of(2015, 912.0), Row.of(2016, 1207.0), Row.of(2017, 1682.0) }, new String[] { "x", "gmv" });
    Pipeline pipeline = new Pipeline().add(new Select().setClause("*, x*x AS x2")).add(new LinearRegression().setFeatureCols("x", "x2").setLabelCol("gmv").setPredictionCol("pred"));
    pipeline.fit(train_set).save(DATA_DIR + PIPELINE_MODEL_FILE, true);
    BatchOperator.execute();
    PipelineModel pipelineModel = PipelineModel.load(DATA_DIR + PIPELINE_MODEL_FILE);
    TransformerBase<?>[] stages = pipelineModel.getTransformers();
    for (int i = 0; i < stages.length; i++) {
        System.out.println(String.valueOf(i) + "\t" + stages[i]);
    }
    ((LinearRegressionModel) stages[1]).getModelData().link(new LinearRegModelInfoBatchOp().lazyPrintModelInfo());
    BatchOperator.execute();
}
Also used : C45TrainBatchOp(com.alibaba.alink.operator.batch.classification.C45TrainBatchOp) LinearRegModelInfoBatchOp(com.alibaba.alink.operator.batch.regression.LinearRegModelInfoBatchOp) IOException(java.io.IOException) DecisionTreeModelInfoBatchOp(com.alibaba.alink.operator.batch.classification.DecisionTreeModelInfoBatchOp) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Consumer(java.util.function.Consumer) DecisionTreeModelInfo(com.alibaba.alink.operator.common.tree.TreeModelInfo.DecisionTreeModelInfo) Select(com.alibaba.alink.pipeline.sql.Select) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) LinearRegression(com.alibaba.alink.pipeline.regression.LinearRegression) TransformerBase(com.alibaba.alink.pipeline.TransformerBase)

Aggregations

Select (com.alibaba.alink.pipeline.sql.Select)3 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)2 Pipeline (com.alibaba.alink.pipeline.Pipeline)2 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)2 LinearRegression (com.alibaba.alink.pipeline.regression.LinearRegression)2 C45TrainBatchOp (com.alibaba.alink.operator.batch.classification.C45TrainBatchOp)1 DecisionTreeModelInfoBatchOp (com.alibaba.alink.operator.batch.classification.DecisionTreeModelInfoBatchOp)1 LinearRegModelInfoBatchOp (com.alibaba.alink.operator.batch.regression.LinearRegModelInfoBatchOp)1 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)1 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)1 DecisionTreeModelInfo (com.alibaba.alink.operator.common.tree.TreeModelInfo.DecisionTreeModelInfo)1 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)1 TransformerBase (com.alibaba.alink.pipeline.TransformerBase)1 Lda (com.alibaba.alink.pipeline.clustering.Lda)1 VectorAssembler (com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler)1 Binarizer (com.alibaba.alink.pipeline.feature.Binarizer)1 QuantileDiscretizer (com.alibaba.alink.pipeline.feature.QuantileDiscretizer)1 GeneralizedLinearRegression (com.alibaba.alink.pipeline.regression.GeneralizedLinearRegression)1 File (java.io.File)1 IOException (java.io.IOException)1