Search in sources :

Example 1 with StandardScalerModel

use of com.alibaba.alink.pipeline.dataproc.StandardScalerModel in project Alink by alibaba.

the class StandardScalerTest method test.

@Test
public void test() throws Exception {
    BatchOperator batchData = new TableSourceBatchOp(GenerateData.getBatchTable());
    StreamOperator streamData = new TableSourceStreamOp(GenerateData.getStreamTable());
    StandardScalerTrainBatchOp op = new StandardScalerTrainBatchOp().setWithMean(true).setWithStd(true).setSelectedCols("f0", "f1").linkFrom(batchData);
    new StandardScalerPredictBatchOp().setOutputCols("f0_1", "f1_1").linkFrom(op, batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -0.9272, -1.1547));
            assertRow(rows.get(2), Row.of(1., 2., -0.1325, 0.5774));
            assertRow(rows.get(3), Row.of(4., 2., 1.0596, 0.5774));
        }
    });
    new StandardScalerPredictStreamOp(op).setOutputCols("f0_1", "f1_1").linkFrom(streamData).print();
    StandardScalerModel model1 = new StandardScaler().setWithMean(true).setWithStd(false).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model1.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -2.3333, -3.3333));
            assertRow(rows.get(2), Row.of(1., 2., -0.3333, 1.6666));
            assertRow(rows.get(3), Row.of(4., 2., 2.6666, 1.6666));
        }
    });
    model1.transform(streamData).print();
    StandardScalerModel model2 = new StandardScaler().setWithMean(false).setWithStd(true).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model2.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -0.3974, -1.0392));
            assertRow(rows.get(2), Row.of(1., 2., 0.3974, 0.6928));
            assertRow(rows.get(3), Row.of(4., 2., 1.5894, 0.6928));
        }
    });
    model2.transform(streamData).print();
    StandardScalerModel model3 = new StandardScaler().setWithMean(false).setWithStd(false).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model3.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -1., -3.));
            assertRow(rows.get(2), Row.of(1., 2., 1., 2.));
            assertRow(rows.get(3), Row.of(4., 2., 4., 2.));
        }
    });
    model3.transform(streamData).print();
    StreamOperator.execute();
}
Also used : StandardScalerModel(com.alibaba.alink.pipeline.dataproc.StandardScalerModel) StandardScalerPredictStreamOp(com.alibaba.alink.operator.stream.dataproc.StandardScalerPredictStreamOp) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) StandardScaler(com.alibaba.alink.pipeline.dataproc.StandardScaler) List(java.util.List) TableSourceStreamOp(com.alibaba.alink.operator.stream.source.TableSourceStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Aggregations

BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)1 TableSourceBatchOp (com.alibaba.alink.operator.batch.source.TableSourceBatchOp)1 StreamOperator (com.alibaba.alink.operator.stream.StreamOperator)1 StandardScalerPredictStreamOp (com.alibaba.alink.operator.stream.dataproc.StandardScalerPredictStreamOp)1 TableSourceStreamOp (com.alibaba.alink.operator.stream.source.TableSourceStreamOp)1 StandardScaler (com.alibaba.alink.pipeline.dataproc.StandardScaler)1 StandardScalerModel (com.alibaba.alink.pipeline.dataproc.StandardScalerModel)1 List (java.util.List)1 Row (org.apache.flink.types.Row)1 Test (org.junit.Test)1