Search in sources :

Example 31 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class ImputerTest method testPipeline.

public void testPipeline(String[] selectedColNames) throws Exception {
    BatchOperator batchData = (BatchOperator) getData(true);
    Imputer fillMissingValue = new Imputer().setSelectedCols(selectedColNames).setStrategy("value").setFillValue("1");
    ImputerModel model = fillMissingValue.fit(batchData);
    BatchOperator res = model.transform(batchData);
    List<Row> rows = res.getDataSet().collect();
    rows.sort(new RowComparator(0));
    assertEquals(rows.get(0).getField(2), 1L);
    assertEquals(rows.get(1).getField(2), 2L);
    assertEquals(rows.get(2).getField(2), 1L);
    assertEquals(rows.get(3).getField(2), 0L);
    assertEquals(rows.get(0).getField(3), 1);
    assertEquals(rows.get(1).getField(3), 2);
    assertEquals(rows.get(2).getField(3), 1);
    assertEquals(rows.get(3).getField(3), 0);
    assertEquals((Double) rows.get(0).getField(4), 2.0, 0.001);
    assertEquals((Double) rows.get(1).getField(4), -3.0, 0.001);
    assertEquals((Double) rows.get(2).getField(4), 2.0, 0.001);
    assertEquals((Double) rows.get(3).getField(4), 1.0, 0.001);
    StreamOperator streamData = (StreamOperator) getData(false);
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(model.transform(streamData));
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(result.get(0).getField(2), 1L);
    assertEquals(result.get(1).getField(2), 2L);
    assertEquals(result.get(2).getField(2), 1L);
    assertEquals(result.get(3).getField(2), 0L);
    assertEquals(result.get(0).getField(3), 1);
    assertEquals(result.get(1).getField(3), 2);
    assertEquals(result.get(2).getField(3), 1);
    assertEquals(result.get(3).getField(3), 0);
    assertEquals((Double) result.get(0).getField(4), 2.0, 0.001);
    assertEquals((Double) result.get(1).getField(4), -3.0, 0.001);
    assertEquals((Double) result.get(2).getField(4), 2.0, 0.001);
    assertEquals((Double) result.get(3).getField(4), 1.0, 0.001);
}
Also used : RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator)

Example 32 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class StringIndexerTest method testAlphabetDesc.

@Test
public void testAlphabetDesc() throws Exception {
    BatchOperator data = new MemSourceBatchOp(Arrays.asList(rows), new String[] { "f0" });
    StringIndexer stringIndexer = new StringIndexer().setSelectedCol("f0").setOutputCol("f0_indexed").setStringOrderType("alphabet_desc");
    List<Row> prediction = stringIndexer.fit(data).transform(data).collect();
    checkResult(prediction, new String[] { "tennis", "football", "basketball" });
    StreamOperator streamData = new MemSourceStreamOp(Arrays.asList(rows), new String[] { "f0" });
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(stringIndexer.fit(data).transform(streamData));
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    checkResult(result, new String[] { "tennis", "football", "basketball" });
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Test(org.junit.Test)

Example 33 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class VectorEleWiseProdTest method pipelineStreamTest.

@Test
public void pipelineStreamTest() throws Exception {
    StreamOperator streamOperator = new VectorElementwiseProduct().setSelectedCol("c1").setScalingVector("3.0 2.0 3.0").setOutputCol("product_result").transform((StreamOperator) getData(false));
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(streamOperator);
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(VectorUtil.getVector(result.get(0).getField(4)), VectorUtil.getVector("9.0 4.0 9.0"));
    assertEquals(VectorUtil.getVector(result.get(1).getField(4)), VectorUtil.getVector("9.0 4.0 9.0"));
    assertEquals(VectorUtil.getVector(result.get(2).getField(4)), VectorUtil.getVector("6.0 6.0"));
}
Also used : CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Example 34 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class VectorImputerTest method testPipelineFillValue.

@Test
public void testPipelineFillValue() throws Exception {
    String selectedColName = "vec";
    String strategy = "value";
    double fillValue = -7.0;
    BatchOperator batchData = (BatchOperator) getData(true);
    StreamOperator streamData = (StreamOperator) getData(false);
    VectorImputer fillMissingValue = new VectorImputer().setSelectedCol(selectedColName).setStrategy(strategy).setFillValue(fillValue);
    VectorImputerModel model = fillMissingValue.fit(batchData);
    BatchOperator res = model.transform(batchData);
    List<Row> rows = res.getDataSet().collect();
    rows.sort(new RowComparator(0));
    assertEquals(VectorUtil.getVector(rows.get(0).getField(1)), VectorUtil.getVector("1.0 3.0 -7.0"));
    assertEquals(VectorUtil.getVector(rows.get(1).getField(1)), VectorUtil.getVector("0:-1.0 1:-3.0"));
    assertEquals(VectorUtil.getVector(rows.get(2).getField(1)), VectorUtil.getVector("0:4.0 1:2.0"));
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(model.transform(streamData));
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(VectorUtil.getVector(result.get(0).getField(1)), VectorUtil.getVector("1.0 3.0 -7.0"));
    assertEquals(VectorUtil.getVector(result.get(1).getField(1)), VectorUtil.getVector("0:-1.0 1:-3.0"));
    assertEquals(VectorUtil.getVector(result.get(2).getField(1)), VectorUtil.getVector("0:4.0 1:2.0"));
}
Also used : RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Test(org.junit.Test)

Example 35 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class VectorInteractionTest method pipelineStreamTest.

@Test
public void pipelineStreamTest() throws Exception {
    StreamOperator streamOperator = new VectorInteraction().setSelectedCols(new String[] { "c0", "c3" }).setOutputCol("product_result").transform((StreamOperator) getData(false));
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(streamOperator);
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(VectorUtil.getVector(result.get(0).getField(5)), VectorUtil.getVector("$36$7:4.0 8:6.0 11:8.6 13:6.0 14:9.0 17:12.899999999999999 31:8.6 32:12.899999999999999 35:18.49"));
    assertEquals(VectorUtil.getVector(result.get(1).getField(5)), VectorUtil.getVector("$48$9:4.0 10:6.0 15:8.6 17:6.0 18:9.0 23:12.899999999999999 41:8.6 42:12.899999999999999 47:18.49"));
    assertEquals(VectorUtil.getVector(result.get(2).getField(5)), VectorUtil.getVector("$48$9:4.0 10:6.0 15:8.6 17:6.0 18:9.0 23:12.899999999999999 41:8.6 42:12.899999999999999 47:18.49"));
}
Also used : CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Aggregations

CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)80 Test (org.junit.Test)76 Row (org.apache.flink.types.Row)72 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)60 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)30 RowComparator (com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator)25 StreamOperator (com.alibaba.alink.operator.stream.StreamOperator)25 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)20 Pipeline (com.alibaba.alink.pipeline.Pipeline)9 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)9 Timestamp (java.sql.Timestamp)8 SparseVector (com.alibaba.alink.common.linalg.SparseVector)6 StringNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.StringNearestNeighborBatchOpTest)6 TextApproxNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.TextApproxNearestNeighborBatchOpTest)6 OverCountWindowStreamOp (com.alibaba.alink.operator.stream.feature.OverCountWindowStreamOp)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 MTable (com.alibaba.alink.common.MTable)3 DenseVector (com.alibaba.alink.common.linalg.DenseVector)3 TableSchema (org.apache.flink.table.api.TableSchema)3