Search in sources :

Example 6 with TableSourceStreamOp

use of com.alibaba.alink.operator.stream.source.TableSourceStreamOp in project Alink by alibaba.

the class GeoKMeansTest method before.

@Before
public void before() {
    Row[] rows = new Row[] { Row.of(0, 0, 0), Row.of(1, 8, 8), Row.of(2, 1, 2), Row.of(3, 9, 10), Row.of(4, 3, 1), Row.of(5, 10, 7) };
    inputBatchOp = new TableSourceBatchOp(MLEnvironmentFactory.getDefault().createBatchTable(rows, new String[] { "id", "f0", "f1" }));
    inputStreamOp = new TableSourceStreamOp(MLEnvironmentFactory.getDefault().createStreamTable(rows, new String[] { "id", "f0", "f1" }));
    expectedPrediction = new double[] { 185.31, 117.08, 117.18, 183.04, 185.32, 183.70 };
}
Also used : Row(org.apache.flink.types.Row) TableSourceStreamOp(com.alibaba.alink.operator.stream.source.TableSourceStreamOp) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) Before(org.junit.Before)

Example 7 with TableSourceStreamOp

use of com.alibaba.alink.operator.stream.source.TableSourceStreamOp in project Alink by alibaba.

the class SqliteCatalogTest method sinkStream.

@Test
public void sinkStream() throws Exception {
    Row[] rows = new Row[] { Row.of(new byte[] { 0, 1 }, new BigDecimal("0.00"), (byte) 0, (short) 0, 0, 0.0f, 0.0, 0, new Date(0), new Time(0), new Timestamp(0), "string", "string", new byte[] { 0, 1 }, "s", new byte[] { 0, 1 }, false, 0L) };
    MemSourceStreamOp memSourceStreamOp = new MemSourceStreamOp(Arrays.asList(rows), new TableSchema(new String[] { "col_bit", "col_decimal", "col_tinyint", "col_smallint", "col_int", "col_float", "col_double", "col_mediumint", "col_date", "col_time", "col_timestamp", "col_text", "col_varchar", "col_varbinary", "col_char", "col_binary", "col_boolean", "col_long" }, new TypeInformation<?>[] { Types.PRIMITIVE_ARRAY(Types.BYTE), Types.BIG_DEC, Types.BYTE, Types.SHORT, Types.INT, Types.FLOAT, Types.DOUBLE, Types.INT, Types.SQL_DATE, Types.SQL_TIME, Types.SQL_TIMESTAMP, Types.STRING, Types.STRING, Types.PRIMITIVE_ARRAY(Types.BYTE), Types.STRING, Types.PRIMITIVE_ARRAY(Types.BYTE), Types.BOOLEAN, Types.LONG }));
    sqlite.sinkStream(new ObjectPath(SQLITE_DB, SQLITE_DB_TABLE_1), memSourceStreamOp.getOutputTable(), new Params().set(HasOverwriteSink.OVERWRITE_SINK, true), memSourceStreamOp.getMLEnvironmentId());
    StreamOperator.execute();
    new TableSourceStreamOp(sqlite.sourceStream(new ObjectPath(SQLITE_DB, SQLITE_DB_TABLE_1), new Params(), MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID)).print();
    StreamOperator.execute();
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) Params(org.apache.flink.ml.api.misc.param.Params) Time(java.sql.Time) Timestamp(java.sql.Timestamp) BigDecimal(java.math.BigDecimal) Date(java.sql.Date) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Row(org.apache.flink.types.Row) TableSourceStreamOp(com.alibaba.alink.operator.stream.source.TableSourceStreamOp) Test(org.junit.Test)

Example 8 with TableSourceStreamOp

use of com.alibaba.alink.operator.stream.source.TableSourceStreamOp in project Alink by alibaba.

the class StandardScalerTest method test.

@Test
public void test() throws Exception {
    BatchOperator batchData = new TableSourceBatchOp(GenerateData.getBatchTable());
    StreamOperator streamData = new TableSourceStreamOp(GenerateData.getStreamTable());
    StandardScalerTrainBatchOp op = new StandardScalerTrainBatchOp().setWithMean(true).setWithStd(true).setSelectedCols("f0", "f1").linkFrom(batchData);
    new StandardScalerPredictBatchOp().setOutputCols("f0_1", "f1_1").linkFrom(op, batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -0.9272, -1.1547));
            assertRow(rows.get(2), Row.of(1., 2., -0.1325, 0.5774));
            assertRow(rows.get(3), Row.of(4., 2., 1.0596, 0.5774));
        }
    });
    new StandardScalerPredictStreamOp(op).setOutputCols("f0_1", "f1_1").linkFrom(streamData).print();
    StandardScalerModel model1 = new StandardScaler().setWithMean(true).setWithStd(false).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model1.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -2.3333, -3.3333));
            assertRow(rows.get(2), Row.of(1., 2., -0.3333, 1.6666));
            assertRow(rows.get(3), Row.of(4., 2., 2.6666, 1.6666));
        }
    });
    model1.transform(streamData).print();
    StandardScalerModel model2 = new StandardScaler().setWithMean(false).setWithStd(true).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model2.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -0.3974, -1.0392));
            assertRow(rows.get(2), Row.of(1., 2., 0.3974, 0.6928));
            assertRow(rows.get(3), Row.of(4., 2., 1.5894, 0.6928));
        }
    });
    model2.transform(streamData).print();
    StandardScalerModel model3 = new StandardScaler().setWithMean(false).setWithStd(false).setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    model3.transform(batchData).lazyCollect(new Consumer<List<Row>>() {

        @Override
        public void accept(List<Row> rows) {
            rows.sort(compare);
            assertEquals(rows.get(0), Row.of(null, null, null, null));
            assertRow(rows.get(1), Row.of(-1., -3., -1., -3.));
            assertRow(rows.get(2), Row.of(1., 2., 1., 2.));
            assertRow(rows.get(3), Row.of(4., 2., 4., 2.));
        }
    });
    model3.transform(streamData).print();
    StreamOperator.execute();
}
Also used : StandardScalerModel(com.alibaba.alink.pipeline.dataproc.StandardScalerModel) StandardScalerPredictStreamOp(com.alibaba.alink.operator.stream.dataproc.StandardScalerPredictStreamOp) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) StandardScaler(com.alibaba.alink.pipeline.dataproc.StandardScaler) List(java.util.List) TableSourceStreamOp(com.alibaba.alink.operator.stream.source.TableSourceStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Example 9 with TableSourceStreamOp

use of com.alibaba.alink.operator.stream.source.TableSourceStreamOp in project Alink by alibaba.

the class VectorStandardScalerTest method test.

@Test
public void test() throws Exception {
    BatchOperator batchData = new TableSourceBatchOp(GenerateData.getDenseBatch()).link(new AppendIdBatchOp().setIdCol("id"));
    StreamOperator streamData = new TableSourceStreamOp(GenerateData.getDenseStream());
    VectorStandardScalerTrainBatchOp op = new VectorStandardScalerTrainBatchOp().setWithMean(true).setWithStd(true).setSelectedCol("vec").linkFrom(batchData);
    BatchOperator res = new VectorStandardScalerPredictBatchOp().setOutputCol("vec_1").linkFrom(op, batchData);
    List<Row> list = res.collect();
    Collections.sort(list, new Comparator<Row>() {

        @Override
        public int compare(Row o1, Row o2) {
            return Long.compare((long) o1.getField(1), (long) o2.getField(1));
        }
    });
    assertDv(VectorUtil.getDenseVector(list.get(1).getField(2)), new DenseVector(new double[] { -0.9272, -1.1547 }));
    assertDv(VectorUtil.getDenseVector(list.get(0).getField(2)), new DenseVector(new double[] { -0.1325, 0.5774 }));
    assertDv(VectorUtil.getDenseVector(list.get(2).getField(2)), new DenseVector(new double[] { 1.0596, 0.5774 }));
    new VectorStandardScalerPredictStreamOp(op).setOutputCol("vec_1").linkFrom(streamData).print();
    VectorStandardScalerModel model1 = new VectorStandardScaler().setWithMean(true).setWithStd(false).setSelectedCol("vec").setOutputCol("vec_1").fit(batchData);
    list = model1.transform(batchData).collect();
    Collections.sort(list, new Comparator<Row>() {

        @Override
        public int compare(Row o1, Row o2) {
            return Long.compare((long) o1.getField(1), (long) o2.getField(1));
        }
    });
    assertDv(VectorUtil.getDenseVector(list.get(1).getField(2)), new DenseVector(new double[] { -2.3333, -3.3333 }));
    assertDv(VectorUtil.getDenseVector(list.get(0).getField(2)), new DenseVector(new double[] { -0.3333, 1.6666 }));
    assertDv(VectorUtil.getDenseVector(list.get(2).getField(2)), new DenseVector(new double[] { 2.6666, 1.6666 }));
    model1.transform(streamData).print();
    VectorStandardScalerModel model2 = new VectorStandardScaler().setWithMean(false).setWithStd(true).setSelectedCol("vec").setOutputCol("vec_1").fit(batchData);
    list = model2.transform(batchData).collect();
    Collections.sort(list, new Comparator<Row>() {

        @Override
        public int compare(Row o1, Row o2) {
            return Long.compare((long) o1.getField(1), (long) o2.getField(1));
        }
    });
    assertDv(VectorUtil.getDenseVector(list.get(1).getField(2)), new DenseVector(new double[] { -0.3974, -1.0392 }));
    assertDv(VectorUtil.getDenseVector(list.get(0).getField(2)), new DenseVector(new double[] { 0.3974, 0.6928 }));
    assertDv(VectorUtil.getDenseVector(list.get(2).getField(2)), new DenseVector(new double[] { 1.5894, 0.6928 }));
    model2.transform(streamData).print();
    VectorStandardScalerModel model3 = new VectorStandardScaler().setWithMean(false).setWithStd(false).setSelectedCol("vec").setOutputCol("vec_1").fit(batchData);
    list = model3.transform(batchData).collect();
    Collections.sort(list, new Comparator<Row>() {

        @Override
        public int compare(Row o1, Row o2) {
            return Long.compare((long) o1.getField(1), (long) o2.getField(1));
        }
    });
    assertDv(VectorUtil.getDenseVector(list.get(1).getField(2)), new DenseVector(new double[] { -1., -3. }));
    assertDv(VectorUtil.getDenseVector(list.get(0).getField(2)), new DenseVector(new double[] { 1., 2. }));
    assertDv(VectorUtil.getDenseVector(list.get(2).getField(2)), new DenseVector(new double[] { 4., 2. }));
    model3.transform(streamData).print();
    StreamOperator.execute();
}
Also used : VectorStandardScalerPredictStreamOp(com.alibaba.alink.operator.stream.dataproc.vector.VectorStandardScalerPredictStreamOp) VectorStandardScalerModel(com.alibaba.alink.pipeline.dataproc.vector.VectorStandardScalerModel) VectorStandardScaler(com.alibaba.alink.pipeline.dataproc.vector.VectorStandardScaler) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) AppendIdBatchOp(com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp) TableSourceStreamOp(com.alibaba.alink.operator.stream.source.TableSourceStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) DenseVector(com.alibaba.alink.common.linalg.DenseVector) Test(org.junit.Test)

Example 10 with TableSourceStreamOp

use of com.alibaba.alink.operator.stream.source.TableSourceStreamOp in project Alink by alibaba.

the class MinMaxTest method test.

@Test
public void test() throws Exception {
    BatchOperator batchData = new TableSourceBatchOp(GenerateData.getBatchTable());
    StreamOperator streamData = new TableSourceStreamOp(GenerateData.getStreamTable());
    MinMaxScalerTrainBatchOp op = new MinMaxScalerTrainBatchOp().setSelectedCols("f0", "f1").linkFrom(batchData);
    new MinMaxScalerPredictBatchOp().linkFrom(op, batchData).lazyCollect();
    new MinMaxScalerPredictStreamOp(op).linkFrom(streamData).print();
    MinMaxScalerModel model = new MinMaxScaler().setSelectedCols("f0", "f1").setOutputCols("f0_1", "f1_1").fit(batchData);
    List<Row> rows = model.transform(batchData).collect();
    rows.sort(new Comparator<Row>() {

        @Override
        public int compare(Row o1, Row o2) {
            if (o1.getField(0) == null) {
                return -1;
            }
            if (o2.getField(0) == null) {
                return 1;
            }
            if ((double) o1.getField(0) > (double) o2.getField(0)) {
                return 1;
            }
            if ((double) o1.getField(0) < (double) o2.getField(0)) {
                return -1;
            }
            return 0;
        }
    });
    assertEquals(rows.get(0), Row.of(null, null, null, null));
    assertEquals(rows.get(1), Row.of(-1., -3., 0., 0.));
    assertEquals(rows.get(2), Row.of(1., 2., 0.4, 1.));
    assertEquals(rows.get(3), Row.of(4., 2., 1., 1.));
    model.transform(streamData).print();
    StreamOperator.execute();
}
Also used : TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) MinMaxScalerModel(com.alibaba.alink.pipeline.dataproc.MinMaxScalerModel) MinMaxScalerPredictStreamOp(com.alibaba.alink.operator.stream.dataproc.MinMaxScalerPredictStreamOp) MinMaxScaler(com.alibaba.alink.pipeline.dataproc.MinMaxScaler) TableSourceStreamOp(com.alibaba.alink.operator.stream.source.TableSourceStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Aggregations

TableSourceStreamOp (com.alibaba.alink.operator.stream.source.TableSourceStreamOp)12 Row (org.apache.flink.types.Row)11 TableSourceBatchOp (com.alibaba.alink.operator.batch.source.TableSourceBatchOp)10 Test (org.junit.Test)8 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)6 StreamOperator (com.alibaba.alink.operator.stream.StreamOperator)6 DenseVector (com.alibaba.alink.common.linalg.DenseVector)3 Before (org.junit.Before)3 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)2 BigDecimal (java.math.BigDecimal)2 Date (java.sql.Date)2 Time (java.sql.Time)2 Timestamp (java.sql.Timestamp)2 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)2 Params (org.apache.flink.ml.api.misc.param.Params)2 TableSchema (org.apache.flink.table.api.TableSchema)2 ObjectPath (org.apache.flink.table.catalog.ObjectPath)2 AppendIdBatchOp (com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp)1 MaxAbsScalerPredictStreamOp (com.alibaba.alink.operator.stream.dataproc.MaxAbsScalerPredictStreamOp)1 MinMaxScalerPredictStreamOp (com.alibaba.alink.operator.stream.dataproc.MinMaxScalerPredictStreamOp)1