use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class NGramTest method testNGram.
@Test
public void testNGram() throws Exception {
Row[] rows = new Row[] { Row.of(0, "a a b b c c a") };
List<Row> expected = Arrays.asList(Row.of(0, "a_a a_b b_b b_c c_c c_a"));
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence" });
NGram op = new NGram().setSelectedCol("sentence");
assertListRowEqualWithoutOrder(expected, op.transform(data).collect());
CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(op.transform(dataStream));
StreamOperator.execute();
assertListRowEqualWithoutOrder(expected, sink.getAndRemoveValues());
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class RegexTokenizerTest method testTokenize.
@Test
public void testTokenize() throws Exception {
Row[] rows = new Row[] { Row.of(0, "Hello this is a good book!") };
List<Row> expected = Arrays.asList(Row.of(0, "Hello this is a good book!", "hello this is good book"));
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence" });
RegexTokenizer op = new RegexTokenizer().setSelectedCol("sentence").setGaps(false).setMinTokenLength(2).setToLowerCase(true).setOutputCol("token").setPattern("\\w+");
assertListRowEqualWithoutOrder(expected, op.transform(data).collect());
CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(op.transform(dataStream));
StreamOperator.execute();
assertListRowEqualWithoutOrder(expected, sink.getAndRemoveValues());
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class SegmentTest method testSegment.
@Test
public void testSegment() throws Exception {
Row[] rows = new Row[] { Row.of(1, "别人复习是查漏补缺") };
List<Row> expected = Arrays.asList(Row.of(1, "别人复习是查漏补缺", "别人 复习 是 查漏 补缺"));
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence" });
Segment op = new Segment().setSelectedCol("sentence").setOutputCol("output");
assertListRowEqualWithoutOrder(expected, op.transform(data).collect());
CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(op.transform(dataStream));
StreamOperator.execute();
assertListRowEqualWithoutOrder(expected, sink.getAndRemoveValues());
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class StopWordsRemoverTest method testFilterStopWords.
@Test
public void testFilterStopWords() throws Exception {
Row[] rows = new Row[] { Row.of(0, "This is a good book") };
List<Row> expected = Arrays.asList(Row.of(0, "This is a good book", "good book"));
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence" });
StopWordsRemover op = new StopWordsRemover().setSelectedCol("sentence").setOutputCol("output");
assertListRowEqualWithoutOrder(expected, op.transform(data).collect());
CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(op.transform(dataStream));
StreamOperator.execute();
assertListRowEqualWithoutOrder(expected, sink.getAndRemoveValues());
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class TokenizerTest method testTokenize.
@Test
public void testTokenize() throws Exception {
Row[] rows = new Row[] { Row.of(0, "Hello this is a good book") };
List<Row> expected = Arrays.asList(Row.of(0, "Hello this is a good book", "hello this is a good book"));
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence" });
Tokenizer op = new Tokenizer().setSelectedCol("sentence").setOutputCol("token");
assertListRowEqualWithoutOrder(expected, op.transform(data).collect());
CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(op.transform(dataStream));
StreamOperator.execute();
assertListRowEqualWithoutOrder(expected, sink.getAndRemoveValues());
}
Aggregations