use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class StringNearestNeighborTest method testVectorApproxStream.
@Test
public void testVectorApproxStream() throws Exception {
BatchOperator dict = new MemSourceBatchOp(Arrays.asList(dictRows), new String[] { "id", "vec" });
StreamOperator query = new MemSourceStreamOp(Arrays.asList(queryRows), new String[] { "id", "vec" });
VectorApproxNearestNeighborModel neareastNeighbor = new VectorApproxNearestNeighbor().setIdCol("id").setSelectedCol("vec").setTopN(3).setOutputCol("output").fit(dict);
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(neareastNeighbor.transform(query));
StreamOperator.execute();
List<Row> res = collectSinkStreamOp.getAndRemoveValues();
Map<Object, Double[]> score = new HashMap<>();
score.put(1, new Double[] { 0.0, 0.17320508075688776, 0.3464101615137755 });
score.put(2, new Double[] { 0.0, 0.17320508075688773, 0.17320508075688776 });
score.put(3, new Double[] { 0.0, 0.17320508075688776, 0.3464101615137755 });
score.put(4, new Double[] { 0.0, 0.17320508075680896, 0.346410161513782 });
score.put(5, new Double[] { 0.0, 0.17320508075680896, 0.17320508075680896 });
score.put(6, new Double[] { 0.0, 0.17320508075680896, 0.346410161513782 });
for (Row row : res) {
Double[] actual = StringNearestNeighborBatchOpTest.extractScore((String) row.getField(2));
Double[] expect = score.get(row.getField(0));
for (int i = 0; i < actual.length; i++) {
Assert.assertEquals(actual[i], expect[i], 0.01);
}
}
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class StringNearestNeighborTest method testVectorStream.
@Test
public void testVectorStream() throws Exception {
BatchOperator dict = new MemSourceBatchOp(Arrays.asList(dictRows), new String[] { "id", "vec" });
StreamOperator query = new MemSourceStreamOp(Arrays.asList(queryRows), new String[] { "id", "vec" });
VectorNearestNeighborModel neareastNeighbor = new VectorNearestNeighbor().setIdCol("id").setSelectedCol("vec").setTopN(3).setOutputCol("output").fit(dict);
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(neareastNeighbor.transform(query));
StreamOperator.execute();
List<Row> res = collectSinkStreamOp.getAndRemoveValues();
Map<Object, Double[]> score = new HashMap<>();
score.put(1, new Double[] { 0.0, 0.17320508075688776, 0.3464101615137755 });
score.put(2, new Double[] { 0.0, 0.17320508075688773, 0.17320508075688776 });
score.put(3, new Double[] { 0.0, 0.17320508075688776, 0.3464101615137755 });
score.put(4, new Double[] { 0.0, 0.17320508075680896, 0.346410161513782 });
score.put(5, new Double[] { 0.0, 0.17320508075680896, 0.17320508075680896 });
score.put(6, new Double[] { 0.0, 0.17320508075680896, 0.346410161513782 });
for (Row row : res) {
Double[] actual = StringNearestNeighborBatchOpTest.extractScore((String) row.getField(2));
Double[] expect = score.get(row.getField(0));
for (int i = 0; i < actual.length; i++) {
Assert.assertEquals(actual[i], expect[i], 0.01);
}
}
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class StringNearestNeighborTest method testTextApproxStream.
@Test
public void testTextApproxStream() throws Exception {
BatchOperator dict = new MemSourceBatchOp(Arrays.asList(TextApproxNearestNeighborBatchOpTest.dictRows), new String[] { "id", "str" });
StreamOperator query = new MemSourceStreamOp(Arrays.asList(TextApproxNearestNeighborBatchOpTest.queryRows), new String[] { "id", "str" });
StringApproxNearestNeighborModel neareastNeighbor = new TextApproxNearestNeighbor().setIdCol("id").setSelectedCol("str").setTopN(3).setOutputCol("output").fit(dict);
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(neareastNeighbor.transform(query));
StreamOperator.execute();
List<Row> res = collectSinkStreamOp.getAndRemoveValues();
Map<Object, Double[]> score = new HashMap<>();
score.put(1, new Double[] { 0.953125, 0.9375, 0.921875 });
score.put(2, new Double[] { 0.953125, 0.9375, 0.921875 });
score.put(3, new Double[] { 0.9375, 0.921875, 0.90625 });
score.put(4, new Double[] { 0.96875, 0.90625, 0.890625 });
score.put(5, new Double[] { 0.9375, 0.921875, 0.90625 });
score.put(6, new Double[] { 0.96875, 0.90625, 0.890625 });
for (Row row : res) {
Double[] actual = StringNearestNeighborBatchOpTest.extractScore((String) row.getField(2));
Double[] expect = score.get(row.getField(0));
for (int i = 0; i < actual.length; i++) {
Assert.assertEquals(actual[i], expect[i], 0.01);
}
}
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class StringSimilarityPairwiseTest method testStringSimilarityPairwise.
@Test
public void testStringSimilarityPairwise() throws Exception {
List<Row> df = Arrays.asList(Row.of(0, "abcde", "aabce"), Row.of(1, "aacedw", "aabbed"), Row.of(2, "cdefa", "bbcefa"), Row.of(3, "bdefh", "ddeac"), Row.of(4, "acedm", "aeefbc"));
BatchOperator<?> inOp1 = new MemSourceBatchOp(df, "id int, text1 string, text2 string");
StreamOperator<?> inOp2 = new MemSourceStreamOp(df, "id int, text1 string, text2 string");
StringSimilarityPairwise stringSimilarityPairwise = new StringSimilarityPairwise().setSelectedCols("text1", "text2").setMetric("LEVENSHTEIN").setOutputCol("output");
stringSimilarityPairwise.transform(inOp1).print();
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(stringSimilarityPairwise.transform(inOp2));
StreamOperator.execute();
List<Row> list = collectSinkStreamOp.getAndRemoveValues();
list.sort(new RowComparator(0));
for (int i = 0; i < list.size(); i++) {
System.out.println("\"" + list.get(i).toString() + "\",");
}
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class VectorNormalizeTest method pipelineStreamTest.
@Test
public void pipelineStreamTest() throws Exception {
StreamOperator streamOperator = new VectorNormalizer().setP(2.0).setOutputCol("pm").setSelectedCol("c0").transform((StreamOperator) getData(false));
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(streamOperator);
StreamOperator.execute();
List<Row> result = collectSinkStreamOp.getAndRemoveValues();
result.sort(new RowComparator(0));
assertEquals(VectorUtil.getVector(result.get(0).getField(4)), VectorUtil.getVector("$6$1:0.35640489924669927 2:0.5346073488700489 5:0.7662705333804034"));
assertEquals(VectorUtil.getVector(result.get(1).getField(4)), VectorUtil.getVector("$8$1:0.35640489924669927 2:0.5346073488700489 7:0.7662705333804034"));
assertEquals(VectorUtil.getVector(result.get(2).getField(4)), VectorUtil.getVector("$8$1:0.35640489924669927 2:0.5346073488700489 7:0.7662705333804034"));
}
Aggregations