use of org.apache.apex.malhar.lib.dedup.TimeBasedDedupOperator in project apex-malhar by apache.
the class Application method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
// Test Data Generator Operator
RandomDataGeneratorOperator gen = dag.addOperator("RandomGenerator", new RandomDataGeneratorOperator());
// Dedup Operator. Configuration through resources/META-INF/properties.xml
TimeBasedDedupOperator dedup = dag.addOperator("Deduper", new TimeBasedDedupOperator());
// Console output operator for unique tuples
ConsoleOutputOperator consoleUnique = dag.addOperator("ConsoleUnique", new ConsoleOutputOperator());
// Console output operator for duplicate tuples
ConsoleOutputOperator consoleDuplicate = dag.addOperator("ConsoleDuplicate", new ConsoleOutputOperator());
// Console output operator for duplicate tuples
ConsoleOutputOperator consoleExpired = dag.addOperator("ConsoleExpired", new ConsoleOutputOperator());
// Streams
dag.addStream("Generator to Dedup", gen.output, dedup.input);
// Connect Dedup unique to Console
dag.addStream("Dedup Unique to Console", dedup.unique, consoleUnique.input);
// Connect Dedup duplicate to Console
dag.addStream("Dedup Duplicate to Console", dedup.duplicate, consoleDuplicate.input);
// Connect Dedup expired to Console
dag.addStream("Dedup Expired to Console", dedup.expired, consoleExpired.input);
// Set Attribute TUPLE_CLASS for supplying schema information to the port
dag.setInputPortAttribute(dedup.input, Context.PortContext.TUPLE_CLASS, TestEvent.class);
// Uncomment the following line to create multiple partitions for Dedup operator. In this case: 2
// dag.setAttribute(dedup, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<TimeBasedDedupOperator>(2));
}
Aggregations