Search in sources :

Example 26 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class VectorSizeHintTest method pipelineStreamTest.

@Test
public void pipelineStreamTest() throws Exception {
    StreamOperator streamOperator = new VectorSizeHint().setSelectedCol("c0").setOutputCol("filter_result").setSize(8).transform((StreamOperator) getData(false));
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(streamOperator);
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(VectorUtil.getSparseVector(result.get(0).getField(4)), new SparseVector(8, new int[] { 1, 2, 7 }, new double[] { 2.0, 3.0, 4.3 }));
    assertEquals(VectorUtil.getSparseVector(result.get(1).getField(4)), new SparseVector(8, new int[] { 1, 2, 7 }, new double[] { 2.0, 3.0, 4.3 }));
}
Also used : CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Example 27 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class VectorStandardScalerTest method testPipeline.

public static void testPipeline(boolean withMean, boolean withStdv) throws Exception {
    Row[] rowData = new Row[] { Row.of("0", "1.0 2.0"), Row.of("1", "-1.0 -3.0"), Row.of("2", "4.0 2.0") };
    TableSchema schema = new TableSchema(new String[] { "id", "vec" }, new TypeInformation<?>[] { Types.STRING, Types.STRING });
    BatchOperator batchData = new MemSourceBatchOp(Arrays.asList(rowData), schema);
    StreamOperator streamData = new MemSourceStreamOp(Arrays.asList(rowData), schema);
    String selectedColName = "vec";
    VectorStandardScaler scaler = new VectorStandardScaler().setSelectedCol(selectedColName).setWithMean(withMean).setWithStd(withStdv);
    VectorStandardScalerModel denseModel = scaler.fit(batchData);
    BatchOperator res = denseModel.transform(batchData);
    List<Row> rows = res.getDataSet().collect();
    rows.sort(new RowComparator(0));
    assertEquals(rows.get(0).getField(1), VectorUtil.getVector("-0.13245323570650433 0.5773502691896257"));
    assertEquals(rows.get(1).getField(1), VectorUtil.getVector("-0.9271726499455304 -1.1547005383792515"));
    assertEquals(rows.get(2).getField(1), VectorUtil.getVector("1.059625885652035 0.5773502691896257"));
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(denseModel.transform(streamData));
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(VectorUtil.getVector(result.get(0).getField(1)), VectorUtil.getVector("-0.13245323570650433 0.5773502691896257"));
    assertEquals(VectorUtil.getVector(result.get(1).getField(1)), VectorUtil.getVector("-0.9271726499455304 -1.1547005383792515"));
    assertEquals(VectorUtil.getVector(result.get(2).getField(1)), VectorUtil.getVector("1.059625885652035 0.5773502691896257"));
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) TableSchema(org.apache.flink.table.api.TableSchema) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator)

Example 28 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class BucketizerTest method testBucketizer.

@Test
public void testBucketizer() throws Exception {
    Row[] rows = new Row[] { Row.of(1, -999.9, -999.9), Row.of(2, -0.5, -0.2), Row.of(3, -0.3, -0.1), Row.of(4, 0.0, 0.0), Row.of(5, 0.2, 0.4), Row.of(6, 999.9, 999.9) };
    double[][] cutsArray = new double[][] { { -0.5, 0.0, 0.5 }, { -0.3, 0.0, 0.3, 0.4 } };
    List<Row> expectedRows = Arrays.asList(Row.of(1, 0L), Row.of(2, 0L), Row.of(3, 1L), Row.of(4, 1L), Row.of(5, 2L), Row.of(6, 3L));
    BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "features1", "features2" });
    StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "features1", "features2" });
    Bucketizer op = new Bucketizer().setSelectedCols(new String[] { "features1", "features2" }).setOutputCols(new String[] { "bucket1", "bucket2" }).setCutsArray(cutsArray);
    List<Row> list = op.transform(data).select("id, bucket1").collect();
    assertListRowEqual(expectedRows, list, 0);
    CollectSinkStreamOp resS = op.transform(dataStream).select("id, bucket1").link(new CollectSinkStreamOp());
    StreamOperator.execute();
    assertListRowEqual(expectedRows, resS.getAndRemoveValues(), 0);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 29 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class FeatureHasherTest method test.

@Test
public void test() throws Exception {
    Row[] rows = new Row[] { Row.of(1, 1.1, true, "2", "A"), Row.of(2, 1.1, false, "2", "B"), Row.of(3, 1.1, true, "1", "B"), Row.of(4, 2.2, true, "1", "A") };
    List<Row> expectedRows = Arrays.asList(Row.of(1, new SparseVector(100, new int[] { 9, 38, 45, 95 }, new double[] { 1.0, 1.1, 1.0, 1.0 })), Row.of(2, new SparseVector(100, new int[] { 9, 30, 38, 76 }, new double[] { 1.0, 1.0, 1.1, 1.0 })), Row.of(3, new SparseVector(100, new int[] { 11, 38, 76, 95 }, new double[] { 1.0, 1.1, 1.0, 1.0 })), Row.of(4, new SparseVector(100, new int[] { 11, 38, 45, 95 }, new double[] { 1.0, 2.2, 1.0, 1.0 })));
    BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "double", "bool", "number", "str" });
    StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "double", "bool", "number", "str" });
    FeatureHasher op = new FeatureHasher().setSelectedCols(new String[] { "double", "bool", "number", "str" }).setNumFeatures(100).setOutputCol("features");
    List<Row> list = op.transform(data).select("id, features").collect();
    assertListRowEqual(expectedRows, list, 0);
    CollectSinkStreamOp resStream = op.transform(dataStream).select("id, features").link(new CollectSinkStreamOp());
    StreamOperator.execute();
    assertListRowEqual(expectedRows, resStream.getAndRemoveValues(), 0);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 30 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class KMeansTest method testKmeans.

@Test
public void testKmeans() throws Exception {
    KMeans kMeans = new KMeans().setVectorCol("vector").setPredictionCol("pred").setPredictionDistanceCol("distance").setK(2);
    PipelineModel model = new Pipeline().add(kMeans).fit(inputBatchOp);
    BatchOperator<?> batchPredOp = model.transform(inputBatchOp).select(new String[] { "id", "distance" });
    verifyPredResult(batchPredOp.collect());
    StreamOperator<?> streamPredOp = model.transform(inputStreamOp).select(new String[] { "id", "distance" });
    CollectSinkStreamOp sinkOp = streamPredOp.link(new CollectSinkStreamOp());
    StreamOperator.execute();
    verifyPredResult(sinkOp.getAndRemoveValues());
}
Also used : CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) Pipeline(com.alibaba.alink.pipeline.Pipeline) Test(org.junit.Test)

Aggregations

CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)80 Test (org.junit.Test)76 Row (org.apache.flink.types.Row)72 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)60 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)30 RowComparator (com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator)25 StreamOperator (com.alibaba.alink.operator.stream.StreamOperator)25 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)20 Pipeline (com.alibaba.alink.pipeline.Pipeline)9 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)9 Timestamp (java.sql.Timestamp)8 SparseVector (com.alibaba.alink.common.linalg.SparseVector)6 StringNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.StringNearestNeighborBatchOpTest)6 TextApproxNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.TextApproxNearestNeighborBatchOpTest)6 OverCountWindowStreamOp (com.alibaba.alink.operator.stream.feature.OverCountWindowStreamOp)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 MTable (com.alibaba.alink.common.MTable)3 DenseVector (com.alibaba.alink.common.linalg.DenseVector)3 TableSchema (org.apache.flink.table.api.TableSchema)3