use of org.apache.apex.malhar.lib.window.accumulation.RemoveDuplicates in project apex-malhar by apache.
the class DeDupExample method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
Collector collector = new Collector();
// Create a stream that reads from files in a local folder and output lines one by one to downstream.
ApexStream<String> stream = StreamFactory.fromFolder("./src/test/resources/wordcount", name("textInput")).flatMap(new Function.FlatMapFunction<String, String>() {
@Override
public Iterable<String> f(String input) {
return Arrays.asList(input.split("[\\p{Punct}\\s]+"));
}
}, name("ExtractWords")).map(new Function.MapFunction<String, String>() {
@Override
public String f(String input) {
return input.toLowerCase();
}
}, name("ToLowerCase"));
// Apply window and trigger option.
stream.window(new WindowOption.GlobalWindow(), new TriggerOption().accumulatingFiredPanes().withEarlyFiringsAtEvery(Duration.standardSeconds(1))).accumulate(new RemoveDuplicates<String>(), name("RemoveDuplicates")).print(name("console")).endWith(collector, collector.input).populateDag(dag);
}
Aggregations