Search in sources :

Example 21 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class StringNearestNeighborTest method testVectorApproxStream.

@Test
public void testVectorApproxStream() throws Exception {
    BatchOperator dict = new MemSourceBatchOp(Arrays.asList(dictRows), new String[] { "id", "vec" });
    StreamOperator query = new MemSourceStreamOp(Arrays.asList(queryRows), new String[] { "id", "vec" });
    VectorApproxNearestNeighborModel neareastNeighbor = new VectorApproxNearestNeighbor().setIdCol("id").setSelectedCol("vec").setTopN(3).setOutputCol("output").fit(dict);
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(neareastNeighbor.transform(query));
    StreamOperator.execute();
    List<Row> res = collectSinkStreamOp.getAndRemoveValues();
    Map<Object, Double[]> score = new HashMap<>();
    score.put(1, new Double[] { 0.0, 0.17320508075688776, 0.3464101615137755 });
    score.put(2, new Double[] { 0.0, 0.17320508075688773, 0.17320508075688776 });
    score.put(3, new Double[] { 0.0, 0.17320508075688776, 0.3464101615137755 });
    score.put(4, new Double[] { 0.0, 0.17320508075680896, 0.346410161513782 });
    score.put(5, new Double[] { 0.0, 0.17320508075680896, 0.17320508075680896 });
    score.put(6, new Double[] { 0.0, 0.17320508075680896, 0.346410161513782 });
    for (Row row : res) {
        Double[] actual = StringNearestNeighborBatchOpTest.extractScore((String) row.getField(2));
        Double[] expect = score.get(row.getField(0));
        for (int i = 0; i < actual.length; i++) {
            Assert.assertEquals(actual[i], expect[i], 0.01);
        }
    }
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) HashMap(java.util.HashMap) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test) StringNearestNeighborBatchOpTest(com.alibaba.alink.operator.batch.similarity.StringNearestNeighborBatchOpTest) TextApproxNearestNeighborBatchOpTest(com.alibaba.alink.operator.batch.similarity.TextApproxNearestNeighborBatchOpTest)

Example 22 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class StringNearestNeighborTest method testVectorStream.

@Test
public void testVectorStream() throws Exception {
    BatchOperator dict = new MemSourceBatchOp(Arrays.asList(dictRows), new String[] { "id", "vec" });
    StreamOperator query = new MemSourceStreamOp(Arrays.asList(queryRows), new String[] { "id", "vec" });
    VectorNearestNeighborModel neareastNeighbor = new VectorNearestNeighbor().setIdCol("id").setSelectedCol("vec").setTopN(3).setOutputCol("output").fit(dict);
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(neareastNeighbor.transform(query));
    StreamOperator.execute();
    List<Row> res = collectSinkStreamOp.getAndRemoveValues();
    Map<Object, Double[]> score = new HashMap<>();
    score.put(1, new Double[] { 0.0, 0.17320508075688776, 0.3464101615137755 });
    score.put(2, new Double[] { 0.0, 0.17320508075688773, 0.17320508075688776 });
    score.put(3, new Double[] { 0.0, 0.17320508075688776, 0.3464101615137755 });
    score.put(4, new Double[] { 0.0, 0.17320508075680896, 0.346410161513782 });
    score.put(5, new Double[] { 0.0, 0.17320508075680896, 0.17320508075680896 });
    score.put(6, new Double[] { 0.0, 0.17320508075680896, 0.346410161513782 });
    for (Row row : res) {
        Double[] actual = StringNearestNeighborBatchOpTest.extractScore((String) row.getField(2));
        Double[] expect = score.get(row.getField(0));
        for (int i = 0; i < actual.length; i++) {
            Assert.assertEquals(actual[i], expect[i], 0.01);
        }
    }
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) HashMap(java.util.HashMap) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test) StringNearestNeighborBatchOpTest(com.alibaba.alink.operator.batch.similarity.StringNearestNeighborBatchOpTest) TextApproxNearestNeighborBatchOpTest(com.alibaba.alink.operator.batch.similarity.TextApproxNearestNeighborBatchOpTest)

Example 23 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class StringNearestNeighborTest method testTextApproxStream.

@Test
public void testTextApproxStream() throws Exception {
    BatchOperator dict = new MemSourceBatchOp(Arrays.asList(TextApproxNearestNeighborBatchOpTest.dictRows), new String[] { "id", "str" });
    StreamOperator query = new MemSourceStreamOp(Arrays.asList(TextApproxNearestNeighborBatchOpTest.queryRows), new String[] { "id", "str" });
    StringApproxNearestNeighborModel neareastNeighbor = new TextApproxNearestNeighbor().setIdCol("id").setSelectedCol("str").setTopN(3).setOutputCol("output").fit(dict);
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(neareastNeighbor.transform(query));
    StreamOperator.execute();
    List<Row> res = collectSinkStreamOp.getAndRemoveValues();
    Map<Object, Double[]> score = new HashMap<>();
    score.put(1, new Double[] { 0.953125, 0.9375, 0.921875 });
    score.put(2, new Double[] { 0.953125, 0.9375, 0.921875 });
    score.put(3, new Double[] { 0.9375, 0.921875, 0.90625 });
    score.put(4, new Double[] { 0.96875, 0.90625, 0.890625 });
    score.put(5, new Double[] { 0.9375, 0.921875, 0.90625 });
    score.put(6, new Double[] { 0.96875, 0.90625, 0.890625 });
    for (Row row : res) {
        Double[] actual = StringNearestNeighborBatchOpTest.extractScore((String) row.getField(2));
        Double[] expect = score.get(row.getField(0));
        for (int i = 0; i < actual.length; i++) {
            Assert.assertEquals(actual[i], expect[i], 0.01);
        }
    }
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) HashMap(java.util.HashMap) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test) StringNearestNeighborBatchOpTest(com.alibaba.alink.operator.batch.similarity.StringNearestNeighborBatchOpTest) TextApproxNearestNeighborBatchOpTest(com.alibaba.alink.operator.batch.similarity.TextApproxNearestNeighborBatchOpTest)

Example 24 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class StringSimilarityPairwiseTest method testStringSimilarityPairwise.

@Test
public void testStringSimilarityPairwise() throws Exception {
    List<Row> df = Arrays.asList(Row.of(0, "abcde", "aabce"), Row.of(1, "aacedw", "aabbed"), Row.of(2, "cdefa", "bbcefa"), Row.of(3, "bdefh", "ddeac"), Row.of(4, "acedm", "aeefbc"));
    BatchOperator<?> inOp1 = new MemSourceBatchOp(df, "id int, text1 string, text2 string");
    StreamOperator<?> inOp2 = new MemSourceStreamOp(df, "id int, text1 string, text2 string");
    StringSimilarityPairwise stringSimilarityPairwise = new StringSimilarityPairwise().setSelectedCols("text1", "text2").setMetric("LEVENSHTEIN").setOutputCol("output");
    stringSimilarityPairwise.transform(inOp1).print();
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(stringSimilarityPairwise.transform(inOp2));
    StreamOperator.execute();
    List<Row> list = collectSinkStreamOp.getAndRemoveValues();
    list.sort(new RowComparator(0));
    for (int i = 0; i < list.size(); i++) {
        System.out.println("\"" + list.get(i).toString() + "\",");
    }
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 25 with CollectSinkStreamOp

use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.

the class VectorNormalizeTest method pipelineStreamTest.

@Test
public void pipelineStreamTest() throws Exception {
    StreamOperator streamOperator = new VectorNormalizer().setP(2.0).setOutputCol("pm").setSelectedCol("c0").transform((StreamOperator) getData(false));
    CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(streamOperator);
    StreamOperator.execute();
    List<Row> result = collectSinkStreamOp.getAndRemoveValues();
    result.sort(new RowComparator(0));
    assertEquals(VectorUtil.getVector(result.get(0).getField(4)), VectorUtil.getVector("$6$1:0.35640489924669927 2:0.5346073488700489 5:0.7662705333804034"));
    assertEquals(VectorUtil.getVector(result.get(1).getField(4)), VectorUtil.getVector("$8$1:0.35640489924669927 2:0.5346073488700489 7:0.7662705333804034"));
    assertEquals(VectorUtil.getVector(result.get(2).getField(4)), VectorUtil.getVector("$8$1:0.35640489924669927 2:0.5346073488700489 7:0.7662705333804034"));
}
Also used : CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) RowComparator(com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator) Row(org.apache.flink.types.Row) StreamOperator(com.alibaba.alink.operator.stream.StreamOperator) Test(org.junit.Test)

Aggregations

CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)80 Test (org.junit.Test)76 Row (org.apache.flink.types.Row)72 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)60 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)30 RowComparator (com.alibaba.alink.operator.common.dataproc.SortUtils.RowComparator)25 StreamOperator (com.alibaba.alink.operator.stream.StreamOperator)25 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)20 Pipeline (com.alibaba.alink.pipeline.Pipeline)9 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)9 Timestamp (java.sql.Timestamp)8 SparseVector (com.alibaba.alink.common.linalg.SparseVector)6 StringNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.StringNearestNeighborBatchOpTest)6 TextApproxNearestNeighborBatchOpTest (com.alibaba.alink.operator.batch.similarity.TextApproxNearestNeighborBatchOpTest)6 OverCountWindowStreamOp (com.alibaba.alink.operator.stream.feature.OverCountWindowStreamOp)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 MTable (com.alibaba.alink.common.MTable)3 DenseVector (com.alibaba.alink.common.linalg.DenseVector)3 TableSchema (org.apache.flink.table.api.TableSchema)3