Search in sources :

Example 26 with TableSourceBatchOp

use of com.alibaba.alink.operator.batch.source.TableSourceBatchOp in project Alink by alibaba.

the class Preprocessing method select.

public static BatchOperator<?> select(BatchOperator<?> in, String... selectCols) {
    final int[] selectIndices = TableUtil.findColIndicesWithAssertAndHint(in.getColNames(), selectCols);
    final TypeInformation<?>[] selectColTypes = TableUtil.findColTypesWithAssertAndHint(in.getSchema(), selectCols);
    return new TableSourceBatchOp(DataSetConversionUtil.toTable(in.getMLEnvironmentId(), in.getDataSet().map(new RichMapFunction<Row, Row>() {

        private static final long serialVersionUID = 9119490369706910594L;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            LOG.info("{} open.", getRuntimeContext().getTaskName());
        }

        @Override
        public void close() throws Exception {
            super.close();
            LOG.info("{} close.", getRuntimeContext().getTaskName());
        }

        @Override
        public Row map(Row value) throws Exception {
            Row ret = new Row(selectIndices.length);
            for (int i = 0; i < selectIndices.length; ++i) {
                ret.setField(i, value.getField(selectIndices[i]));
            }
            return ret;
        }
    }), selectCols, selectColTypes)).setMLEnvironmentId(in.getMLEnvironmentId());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Row(org.apache.flink.types.Row) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation)

Example 27 with TableSourceBatchOp

use of com.alibaba.alink.operator.batch.source.TableSourceBatchOp in project Alink by alibaba.

the class TreeModelInfoBatchOp method combinedTreeModelFeatureImportance.

private static BatchOperator<?> combinedTreeModelFeatureImportance(BatchOperator<?> model, BatchOperator<?> featureImportance) {
    DataSet<String> importanceJson = featureImportance.getDataSet().reduceGroup(new GroupReduceFunction<Row, String>() {

        private static final long serialVersionUID = -1576541700351312745L;

        @Override
        public void reduce(Iterable<Row> values, Collector<String> out) throws Exception {
            Map<String, Double> importance = new HashMap<>();
            for (Row val : values) {
                importance.put(String.valueOf(val.getField(0)), ((Number) val.getField(1)).doubleValue());
            }
            out.collect(JsonConverter.toJson(importance));
        }
    });
    DataSet<Row> combined = model.getDataSet().reduceGroup(new RichGroupReduceFunction<Row, Row>() {

        private static final long serialVersionUID = -1576541700351312745L;

        private transient String featureImportanceJson;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            featureImportanceJson = getRuntimeContext().getBroadcastVariableWithInitializer("importanceJson", new BroadcastVariableInitializer<String, String>() {

                @Override
                public String initializeBroadcastVariable(Iterable<String> data) {
                    return data.iterator().next();
                }
            });
        }

        @Override
        public void reduce(Iterable<Row> values, Collector<Row> out) throws Exception {
            List<Row> modelRows = new ArrayList<>();
            for (Row val : values) {
                modelRows.add(val);
            }
            TreeModelDataConverter model = new TreeModelDataConverter().load(modelRows);
            model.meta.set(TreeModelInfo.FEATURE_IMPORTANCE, featureImportanceJson);
            model.save(model, out);
        }
    }).withBroadcastSet(importanceJson, "importanceJson");
    return new TableSourceBatchOp(DataSetConversionUtil.toTable(model.getMLEnvironmentId(), combined, model.getColNames(), model.getColTypes()));
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) Configuration(org.apache.flink.configuration.Configuration) BroadcastVariableInitializer(org.apache.flink.api.common.functions.BroadcastVariableInitializer) ArrayList(java.util.ArrayList) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) Collector(org.apache.flink.util.Collector) Row(org.apache.flink.types.Row) HashMap(java.util.HashMap) Map(java.util.Map)

Example 28 with TableSourceBatchOp

use of com.alibaba.alink.operator.batch.source.TableSourceBatchOp in project Alink by alibaba.

the class SampleWithSizeBatchOpTest method test.

@Test
public void test() throws Exception {
    TableSourceBatchOp tableSourceBatchOp = new TableSourceBatchOp(getBatchTable());
    long cnt = tableSourceBatchOp.link(new SampleWithSizeBatchOp(5, true)).count();
    assert cnt == 5;
}
Also used : TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) Test(org.junit.Test)

Example 29 with TableSourceBatchOp

use of com.alibaba.alink.operator.batch.source.TableSourceBatchOp in project Alink by alibaba.

the class StandardScalerTest method testModelInfo.

@Test
public void testModelInfo() {
    BatchOperator batchData = new TableSourceBatchOp(GenerateData.getBatchTable());
    StandardScalerTrainBatchOp trainOp = new StandardScalerTrainBatchOp().setWithMean(true).setWithStd(true).setSelectedCols("f0").linkFrom(batchData);
    StandardScalerModelInfo modelInfo = trainOp.getModelInfoBatchOp().collectModelInfo();
    System.out.println(modelInfo.getMeans().length);
    System.out.println(modelInfo.getStdDevs().length);
    System.out.println(modelInfo.isWithMeans());
    System.out.println(modelInfo.isWithStdDevs());
    System.out.println(modelInfo.toString());
}
Also used : StandardScalerModelInfo(com.alibaba.alink.operator.common.dataproc.StandardScalerModelInfo) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Test(org.junit.Test)

Example 30 with TableSourceBatchOp

use of com.alibaba.alink.operator.batch.source.TableSourceBatchOp in project Alink by alibaba.

the class StandardScalerTest method test.

@Test
public void test() throws Exception {
    BatchOperator batchData = new TableSourceBatchOp(GenerateData.getBatchTable());
    StreamOperator streamData = new TableSourceStreamOp(GenerateData.getStreamTable());
    StandardScalerTrainBatchOp op = new StandardScalerTrainBatchOp().setWithMean(true).setWithStd(true).setSelectedCols("f0", "f1").linkFrom(batchData);
    new StandardScalerPredictBatchOp().setOutputCols("f0_1", "f1_1").linkFrom(op, batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -0.9272, -1.1547));
            assertRow(rows.get(2), Row.of(1., 2., -0.1325, 0.5774));
            assertRow(rows.get(3), Row.of(4., 2., 1.0596, 0.5774));
        }
    });
    new StandardScalerPredictStreamOp(op).setOutputCols("f0_1", "f1_1").linkFrom(streamData).print();
    StandardScalerModel model1 = new StandardScaler().setWithMean(true).setWithStd(false).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model1.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -2.3333, -3.3333));
            assertRow(rows.get(2), Row.of(1., 2., -0.3333, 1.6666));
            assertRow(rows.get(3), Row.of(4., 2., 2.6666, 1.6666));
        }
    });
    model1.transform(streamData).print();
    StandardScalerModel model2 = new StandardScaler().setWithMean(false).setWithStd(true).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model2.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -0.3974, -1.0392));
            assertRow(rows.get(2), Row.of(1., 2., 0.3974, 0.6928));
            assertRow(rows.get(3), Row.of(4., 2., 1.5894, 0.6928));
        }
    });
    model2.transform(streamData).print();
    StandardScalerModel model3 = new StandardScaler().setWithMean(false).setWithStd(false).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model3.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -1., -3.));
            assertRow(rows.get(2), Row.of(1., 2., 1., 2.));
            assertRow(rows.get(3), Row.of(4., 2., 4., 2.));
        }
    });
    model3.transform(streamData).print();
    StreamOperator.execute();
}
Also used : StandardScalerModel(com.alibaba.alink.pipeline.dataproc.StandardScalerModel) StandardScalerPredictStreamOp(com.alibaba.alink.operator.stream.dataproc.StandardScalerPredictStreamOp) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) StandardScaler(com.alibaba.alink.pipeline.dataproc.StandardScaler) List(java.util.List) TableSourceStreamOp(com.alibaba.alink.operator.stream.source.TableSourceStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Aggregations

TableSourceBatchOp (com.alibaba.alink.operator.batch.source.TableSourceBatchOp)39 Row (org.apache.flink.types.Row)29 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)22 Test (org.junit.Test)18 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)12 TableSourceStreamOp (com.alibaba.alink.operator.stream.source.TableSourceStreamOp)10 Params (org.apache.flink.ml.api.misc.param.Params)10 List (java.util.List)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)8 StreamOperator (com.alibaba.alink.operator.stream.StreamOperator)6 ArrayList (java.util.ArrayList)6 TableSchema (org.apache.flink.table.api.TableSchema)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 Comparator (java.util.Comparator)4 HashMap (java.util.HashMap)4 MapFunction (org.apache.flink.api.common.functions.MapFunction)4 DataSet (org.apache.flink.api.java.DataSet)4 Mapper (com.alibaba.alink.common.mapper.Mapper)3 ModelMapper (com.alibaba.alink.common.mapper.ModelMapper)3 PipelineModelMapper (com.alibaba.alink.common.mapper.PipelineModelMapper)3