Search in sources :

Example 1 with MemSourceBatchOp

use of com.alibaba.alink.operator.batch.source.MemSourceBatchOp in project Alink by alibaba.

the class Chap01 method c_5_2.

static void c_5_2() throws Exception {
    BatchOperator<?> train_set = new MemSourceBatchOp(new Row[] { Row.of(2009, 0.5), Row.of(2010, 9.36), Row.of(2011, 52.0), Row.of(2012, 191.0), Row.of(2013, 350.0), Row.of(2014, 571.0), Row.of(2015, 912.0), Row.of(2016, 1207.0), Row.of(2017, 1682.0) }, new String[] { "x", "gmv" });
    BatchOperator<?> pred_set = new MemSourceBatchOp(new Integer[] { 2018, 2019 }, "x");
    train_set = train_set.select("x, x*x AS x2, gmv");
    LinearRegTrainBatchOp trainer = new LinearRegTrainBatchOp().setFeatureCols("x", "x2").setLabelCol("gmv");
    train_set.link(trainer);
    trainer.link(new AkSinkBatchOp().setFilePath(DATA_DIR + "gmv_reg.model").setOverwriteSink(true));
    BatchOperator.execute();
    BatchOperator<?> lr_model = new AkSourceBatchOp().setFilePath(DATA_DIR + "gmv_reg.model");
    pred_set = pred_set.select("x, x*x AS x2");
    LinearRegPredictBatchOp predictor = new LinearRegPredictBatchOp().setPredictionCol("pred");
    predictor.linkFrom(lr_model, pred_set).print();
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) LinearRegTrainBatchOp(com.alibaba.alink.operator.batch.regression.LinearRegTrainBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) LinearRegPredictBatchOp(com.alibaba.alink.operator.batch.regression.LinearRegPredictBatchOp)

Example 2 with MemSourceBatchOp

use of com.alibaba.alink.operator.batch.source.MemSourceBatchOp in project Alink by alibaba.

the class Chap07 method c_5.

static void c_5() throws Exception {
    Row[] rows = new Row[] { Row.of("a", 10.0, 100), Row.of("b", -2.5, 9), Row.of("c", 100.2, 1), Row.of("d", -99.9, 100), Row.of(null, null, null) };
    MemSourceBatchOp source = new MemSourceBatchOp(rows, new String[] { "col1", "col2", "col3" });
    source.lazyPrint(-1, "< origin data >");
    Pipeline pipeline = new Pipeline().add(new Imputer().setSelectedCols("col1").setStrategy(Strategy.VALUE).setFillValue("e")).add(new Imputer().setSelectedCols("col2", "col3").setStrategy(Strategy.MEAN));
    pipeline.fit(source).transform(source).print();
    System.out.println(210 / 4);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) Imputer(com.alibaba.alink.pipeline.dataproc.Imputer) Row(org.apache.flink.types.Row) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Example 3 with MemSourceBatchOp

use of com.alibaba.alink.operator.batch.source.MemSourceBatchOp in project Alink by alibaba.

the class Chap09 method c_2_5.

static void c_2_5() throws Exception {
    MemSourceBatchOp source = new MemSourceBatchOp(new Row[] { Row.of("sunny", 85.0, 85.0, false, "no"), Row.of("sunny", 80.0, 90.0, true, "no"), Row.of("overcast", 83.0, 78.0, false, "yes"), Row.of("rainy", 70.0, 96.0, false, "yes"), Row.of("rainy", 68.0, 80.0, false, "yes"), Row.of("rainy", 65.0, 70.0, true, "no"), Row.of("overcast", 64.0, 65.0, true, "yes"), Row.of("sunny", 72.0, 95.0, false, "no"), Row.of("sunny", 69.0, 70.0, false, "yes"), Row.of("rainy", 75.0, 80.0, false, "yes"), Row.of("sunny", 75.0, 70.0, true, "yes"), Row.of("overcast", 72.0, 90.0, true, "yes"), Row.of("overcast", 81.0, 75.0, false, "yes"), Row.of("rainy", 71.0, 80.0, true, "no") }, new String[] { "Outlook", "Temperature", "Humidity", "Windy", "Play" });
    source.lazyPrint(-1);
    source.link(new C45TrainBatchOp().setFeatureCols("Outlook", "Temperature", "Humidity", "Windy").setCategoricalCols("Outlook", "Windy").setLabelCol("Play").lazyPrintModelInfo().lazyCollectModelInfo(new Consumer<DecisionTreeModelInfo>() {

        @Override
        public void accept(DecisionTreeModelInfo decisionTreeModelInfo) {
            try {
                decisionTreeModelInfo.saveTreeAsImage(DATA_DIR + "weather_tree_model.png", true);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }));
    BatchOperator.execute();
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) C45TrainBatchOp(com.alibaba.alink.operator.batch.classification.C45TrainBatchOp) Consumer(java.util.function.Consumer) DecisionTreeModelInfo(com.alibaba.alink.operator.common.tree.TreeModelInfo.DecisionTreeModelInfo) IOException(java.io.IOException)

Example 4 with MemSourceBatchOp

use of com.alibaba.alink.operator.batch.source.MemSourceBatchOp in project Alink by alibaba.

the class Chap17 method c_5.

static void c_5() throws Exception {
    BatchOperator.setParallelism(1);
    MemSourceBatchOp source = new MemSourceBatchOp(ROWS_DATA, new String[] { "State", "Region", "Division", "longitude", "latitude" });
    source.lazyPrint(5);
    source.select("Region").distinct().lazyPrint(-1);
    source.select("Division").distinct().lazyPrint(-1);
    source.groupBy("Region, Division", "Region, Division, COUNT(*) AS numStates").orderBy("Region, Division", 100).lazyPrint(-1);
    for (int nClusters : new int[] { 2, 4 }) {
        BatchOperator<?> pred = new GeoKMeans().setLongitudeCol("longitude").setLatitudeCol("latitude").setPredictionCol(PREDICTION_COL_NAME).setK(nClusters).fit(source).transform(source);
        pred.link(new EvalClusterBatchOp().setPredictionCol(PREDICTION_COL_NAME).setLabelCol("Region").lazyPrintMetrics(nClusters + " with Region"));
        pred.link(new EvalClusterBatchOp().setPredictionCol(PREDICTION_COL_NAME).setLabelCol("Division").lazyPrintMetrics(nClusters + " with Division"));
        BatchOperator.execute();
    }
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) GeoKMeans(com.alibaba.alink.pipeline.clustering.GeoKMeans) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 5 with MemSourceBatchOp

use of com.alibaba.alink.operator.batch.source.MemSourceBatchOp in project Alink by alibaba.

the class DirectReaderTest method setUp.

@Before
public void setUp() throws Exception {
    inputArray = new String[] { "a", "b", "c" };
    input = new MemSourceBatchOp(inputArray, "col0");
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) Before(org.junit.Before)

Aggregations

MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)316 Row (org.apache.flink.types.Row)259 Test (org.junit.Test)259 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)87 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)55 HashMap (java.util.HashMap)46 TableSchema (org.apache.flink.table.api.TableSchema)38 ArrayList (java.util.ArrayList)36 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)30 StreamOperator (com.alibaba.alink.operator.stream.StreamOperator)17 Params (org.apache.flink.ml.api.misc.param.Params)17 Pipeline (com.alibaba.alink.pipeline.Pipeline)16 Timestamp (java.sql.Timestamp)16 StringNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.StringNearestNeighborBatchOpTest)12 TextApproxNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.TextApproxNearestNeighborBatchOpTest)12 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)12 GroupByBatchOp (com.alibaba.alink.operator.batch.sql.GroupByBatchOp)11 BaseGbdtTrainBatchOp (com.alibaba.alink.operator.common.tree.parallelcart.BaseGbdtTrainBatchOp)11 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)10 File (java.io.File)9