Search in sources :

Example 36 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class VectorMaxAbsScalerTest method testPipelineI.

private static void testPipelineI() throws Exception {
    Row[] rowData = new Row[] { Row.of("0", "1.0 2.0"), Row.of("1", "-1.0 -4.0"), Row.of("2", "-4.0 3.0") };
    TableSchema schema = new TableSchema(new String[] { "id", "vec" }, new TypeInformation<?>[] { Types.STRING, Types.STRING });
    BatchOperator batchData = new MemSourceBatchOp(Arrays.asList(rowData), schema);
    StreamOperator streamData = new MemSourceStreamOp(Arrays.asList(rowData), schema);
    String selectedColName = "vec";
    VectorMaxAbsScaler scaler = new VectorMaxAbsScaler().setSelectedCol(selectedColName);
    VectorMaxAbsScalerModel model = scaler.fit(batchData);
    BatchOperator batchRes = model.transform(batchData);
    List<Row> batchList = batchRes.getDataSet().collect();
    batchList.sort(new RowComparator(0));
    assertEquals(batchList.get(0).getField(1), VectorUtil.getVector("0.25 0.5"));
    assertEquals(batchList.get(1).getField(1), VectorUtil.getVector("-0.25 -1.0"));
    assertEquals(batchList.get(2).getField(1), VectorUtil.getVector("-1.0 0.75"));
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(model.transform(streamData));
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(VectorUtil.getVector(result.get(0).getField(1)), VectorUtil.getVector("0.25 0.5"));
    assertEquals(VectorUtil.getVector(result.get(1).getField(1)), VectorUtil.getVector("-0.25 -1.0"));
    assertEquals(VectorUtil.getVector(result.get(2).getField(1)), VectorUtil.getVector("-1.0 0.75"));
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) TableSchema(org.apache.flink.table.api.TableSchema) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator)

Example 37 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class VectorMinMaxScalerTest method testPipeline.

@Test
public void testPipeline() throws Exception {
    Row[] rowData = new Row[] { Row.of("0", "1.0 2.0"), Row.of("1", "-1.0 -3.0"), Row.of("2", "4.0 2.0") };
    TableSchema schema = new TableSchema(new String[] { "id", "vec" }, new TypeInformation<?>[] { Types.STRING, Types.STRING });
    BatchOperator batchData = new MemSourceBatchOp(Arrays.asList(rowData), schema);
    StreamOperator streamData = new MemSourceStreamOp(Arrays.asList(rowData), schema);
    String selectedColName = "vec";
    VectorMinMaxScaler scaler = new VectorMinMaxScaler().setSelectedCol(selectedColName);
    scaler.setMax(2.0);
    scaler.setMin(-3.0);
    VectorMinMaxScalerModel model = scaler.fit(batchData);
    BatchOperator res = model.transform(batchData);
    List<Row> rows = res.getDataSet().collect();
    rows.sort(new RowComparator(0));
    assertEquals(rows.get(0).getField(1), VectorUtil.getVector("-1.0 2.0"));
    assertEquals(rows.get(1).getField(1), VectorUtil.getVector("-3.0 -3.0"));
    assertEquals(rows.get(2).getField(1), VectorUtil.getVector("2.0 2.0"));
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(model.transform(streamData));
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(VectorUtil.getVector(result.get(0).getField(1)), VectorUtil.getVector("-1.0 2.0"));
    assertEquals(VectorUtil.getVector(result.get(1).getField(1)), VectorUtil.getVector("-3.0 -3.0"));
    assertEquals(VectorUtil.getVector(result.get(2).getField(1)), VectorUtil.getVector("2.0 2.0"));
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) TableSchema(org.apache.flink.table.api.TableSchema) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Example 38 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class DocCountVectorizerTest method testDefault.

@Test
public void testDefault() throws Exception {
    BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence", "label" });
    StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence", "label" });
    DocCountVectorizer op = new DocCountVectorizer().setSelectedCol("sentence").setOutputCol("features").setFeatureType("TF");
    DocCountVectorizerModel model = op.fit(data);
    assertListRowEqual(expected, model.transform(data).collect(), 0);
    CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(model.transform(dataStream));
    StreamOperator.execute();
    assertListRowEqual(expected, sink.getAndRemoveValues(), 0);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Test(org.junit.Test)

Example 39 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class DocHashCountVectorizerTest method testDefault.

@Test
public void testDefault() throws Exception {
    BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence", "label" });
    StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence", "label" });
    DocHashCountVectorizer op = new DocHashCountVectorizer().setSelectedCol("sentence").setNumFeatures(10).setOutputCol("res");
    DocHashCountVectorizerModel model = op.fit(data);
    assertListRowEqualWithoutOrder(expected, model.transform(data).collect());
    CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(model.transform(dataStream));
    StreamOperator.execute();
    assertListRowEqualWithoutOrder(expected, sink.getAndRemoveValues());
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Test(org.junit.Test)

Example 40 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class AlsTrainBatchOpTest method testStreamPredict.

@Test
public void testStreamPredict() throws Exception {
    Long envId = MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID;
    StreamOperator<?> streamsamples = new MemSourceStreamOp(rows1, new String[] { "uid", "iid", "label" }).setMLEnvironmentId(envId);
    BatchOperator<?> model = train();
    AlsRateRecommStreamOp predictor1 = new AlsRateRecommStreamOp(model).setUserCol("uid").setItemCol("iid").setRecommCol("p");
    AlsItemsPerUserRecommStreamOp predictor2 = new AlsItemsPerUserRecommStreamOp(model).setUserCol("uid").setRecommCol("p");
    AlsUsersPerItemRecommStreamOp predictor3 = new AlsUsersPerItemRecommStreamOp(model).setItemCol("iid").setRecommCol("p");
    AlsSimilarUsersRecommStreamOp predictor4 = new AlsSimilarUsersRecommStreamOp(model).setUserCol("uid").setRecommCol("p");
    AlsSimilarItemsRecommStreamOp predictor5 = new AlsSimilarItemsRecommStreamOp(model).setItemCol("iid").setRecommCol("p");
    StreamOperator<?> result1 = predictor1.linkFrom(streamsamples);
    StreamOperator<?> result2 = predictor2.linkFrom(streamsamples);
    StreamOperator<?> result3 = predictor3.linkFrom(streamsamples);
    StreamOperator<?> result4 = predictor4.linkFrom(streamsamples);
    StreamOperator<?> result5 = predictor5.linkFrom(streamsamples);
    result2 = result2.select("*, 'AlsItemsPerUserRecommStreamOp' as rec_type");
    result3 = result3.select("*, 'AlsUsersPerItemRecommStreamOp' as rec_type");
    result4 = result4.select("*, 'AlsSimilarUsersRecommStreamOp' as rec_type");
    result5 = result5.select("*, 'AlsSimilarItemsRecommStreamOp' as rec_type");
    result1.print();
    CollectSinkStreamOp sop = new UnionAllStreamOp().linkFrom(result2, result3, result4, result5).link(new CollectSinkStreamOp());
    StreamOperator.execute();
    List<Row> res = sop.getAndRemoveValues();
    Assert.assertEquals(res.size(), 24);
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) AlsRateRecommStreamOp(com.alibaba.alink.operator.stream.recommendation.AlsRateRecommStreamOp) AlsSimilarItemsRecommStreamOp(com.alibaba.alink.operator.stream.recommendation.AlsSimilarItemsRecommStreamOp) UnionAllStreamOp(com.alibaba.alink.operator.stream.sql.UnionAllStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) AlsItemsPerUserRecommStreamOp(com.alibaba.alink.operator.stream.recommendation.AlsItemsPerUserRecommStreamOp) AlsSimilarUsersRecommStreamOp(com.alibaba.alink.operator.stream.recommendation.AlsSimilarUsersRecommStreamOp) AlsUsersPerItemRecommStreamOp(com.alibaba.alink.operator.stream.recommendation.AlsUsersPerItemRecommStreamOp) Test(org.junit.Test)

Aggregations

CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)80 Test (org.junit.Test)76 Row (org.apache.flink.types.Row)72 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)60 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)30 RowComparator (com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator)25 StreamOperator (com.alibaba.alink.operator.stream.StreamOperator)25 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)20 Pipeline (com.alibaba.alink.pipeline.Pipeline)9 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)9 Timestamp (java.sql.Timestamp)8 SparseVector (com.alibaba.alink.common.linalg.SparseVector)6 StringNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.StringNearestNeighborBatchOpTest)6 TextApproxNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.TextApproxNearestNeighborBatchOpTest)6 OverCountWindowStreamOp (com.alibaba.alink.operator.stream.feature.OverCountWindowStreamOp)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 MTable (com.alibaba.alink.common.MTable)3 DenseVector (com.alibaba.alink.common.linalg.DenseVector)3 TableSchema (org.apache.flink.table.api.TableSchema)3