use of org.apache.apex.malhar.contrib.kinesis.KinesisStringInputOperator in project apex-malhar by apache.
the class KinesisHashtagsApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
// Setup the operator to get the data from twitter sample stream injected into the system.
TwitterSampleInput twitterFeed = new TwitterSampleInput();
twitterFeed = dag.addOperator("TweetSampler", twitterFeed);
// Setup the operator to get the Hashtags extracted from the twitter statuses
TwitterStatusHashtagExtractor HashtagExtractor = dag.addOperator("HashtagExtractor", TwitterStatusHashtagExtractor.class);
// Setup the operator send the twitter statuses to kinesis
KinesisStringOutputOperator outputOp = dag.addOperator("ToKinesis", new KinesisStringOutputOperator());
outputOp.setBatchSize(500);
// Feed the statuses from feed into the input of the Hashtag extractor.
dag.addStream("TweetStream", twitterFeed.status, HashtagExtractor.input).setLocality(Locality.CONTAINER_LOCAL);
// Start counting the Hashtags coming out of Hashtag extractor
dag.addStream("SendToKinesis", HashtagExtractor.hashtags, outputOp.inputPort).setLocality(locality);
// ------------------------------------------------------------------------------------------
KinesisStringInputOperator inputOp = dag.addOperator("FromKinesis", new KinesisStringInputOperator());
ShardManager shardStats = new ShardManager();
inputOp.setShardManager(shardStats);
inputOp.getConsumer().setRecordsLimit(600);
inputOp.setStrategy(AbstractKinesisInputOperator.PartitionStrategy.MANY_TO_ONE.toString());
// Setup a node to count the unique Hashtags within a window.
UniqueCounter<String> uniqueCounter = dag.addOperator("UniqueHashtagCounter", new UniqueCounter<String>());
// Get the aggregated Hashtag counts and count them over last 5 mins.
WindowedTopCounter<String> topCounts = dag.addOperator("TopCounter", new WindowedTopCounter<String>());
topCounts.setTopCount(10);
topCounts.setSlidingWindowWidth(600);
topCounts.setDagWindowWidth(1);
dag.addStream("TwittedHashtags", inputOp.outputPort, uniqueCounter.data).setLocality(locality);
// Count unique Hashtags
dag.addStream("UniqueHashtagCounts", uniqueCounter.count, topCounts.input).setLocality(locality);
// Count top 10
dag.addStream("TopHashtags", topCounts.output, consoleOutput(dag, "topHashtags")).setLocality(locality);
}
Aggregations