use of org.apache.apex.malhar.contrib.twitter.TwitterSampleInput in project apex-malhar by apache.
the class TwitterTopCounterApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
// Setup the operator to get the data from twitter sample stream injected into the system.
TwitterSampleInput twitterFeed = new TwitterSampleInput();
twitterFeed = dag.addOperator("TweetSampler", twitterFeed);
// Setup the operator to get the URLs extracted from the twitter statuses
TwitterStatusURLExtractor urlExtractor = dag.addOperator("URLExtractor", TwitterStatusURLExtractor.class);
// Setup a node to count the unique urls within a window.
UniqueCounter<String> uniqueCounter = dag.addOperator("UniqueURLCounter", new UniqueCounter<String>());
// Get the aggregated url counts and count them over last 5 mins.
dag.setAttribute(uniqueCounter, Context.OperatorContext.APPLICATION_WINDOW_COUNT, 600);
dag.setAttribute(uniqueCounter, Context.OperatorContext.SLIDE_BY_WINDOW_COUNT, 1);
WindowedTopCounter<String> topCounts = dag.addOperator("TopCounter", new WindowedTopCounter<String>());
topCounts.setTopCount(10);
topCounts.setSlidingWindowWidth(1);
topCounts.setDagWindowWidth(1);
// Feed the statuses from feed into the input of the url extractor.
dag.addStream("TweetStream", twitterFeed.status, urlExtractor.input).setLocality(Locality.CONTAINER_LOCAL);
// Start counting the urls coming out of URL extractor
dag.addStream("TwittedURLs", urlExtractor.url, uniqueCounter.data).setLocality(locality);
// Count unique urls
dag.addStream("UniqueURLCounts", uniqueCounter.count, topCounts.input);
consoleOutput(dag, "topURLs", topCounts.output, SNAPSHOT_SCHEMA, "url");
}
use of org.apache.apex.malhar.contrib.twitter.TwitterSampleInput in project apex-malhar by apache.
the class TwitterTrendingHashtagsApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
// Setup the operator to get the data from twitter sample stream injected into the system.
TwitterSampleInput twitterFeed = new TwitterSampleInput();
twitterFeed = dag.addOperator("TweetSampler", twitterFeed);
// Setup a node to count the unique Hashtags within a window.
UniqueCounter<String> uniqueCounter = dag.addOperator("UniqueHashtagCounter", new UniqueCounter<String>());
// Get the aggregated Hashtag counts and count them over last 5 mins.
WindowedTopCounter<String> topCounts = dag.addOperator("TopCounter", new WindowedTopCounter<String>());
topCounts.setTopCount(10);
topCounts.setSlidingWindowWidth(600);
topCounts.setDagWindowWidth(1);
dag.addStream("TwittedHashtags", twitterFeed.hashtag, uniqueCounter.data).setLocality(locality);
// Count unique Hashtags
dag.addStream("UniqueHashtagCounts", uniqueCounter.count, topCounts.input);
TwitterTopCounterApplication.consoleOutput(dag, "topHashtags", topCounts.output, SNAPSHOT_SCHEMA, "hashtag");
}
use of org.apache.apex.malhar.contrib.twitter.TwitterSampleInput in project apex-malhar by apache.
the class KinesisHashtagsApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
// Setup the operator to get the data from twitter sample stream injected into the system.
TwitterSampleInput twitterFeed = new TwitterSampleInput();
twitterFeed = dag.addOperator("TweetSampler", twitterFeed);
// Setup the operator to get the Hashtags extracted from the twitter statuses
TwitterStatusHashtagExtractor HashtagExtractor = dag.addOperator("HashtagExtractor", TwitterStatusHashtagExtractor.class);
// Setup the operator send the twitter statuses to kinesis
KinesisStringOutputOperator outputOp = dag.addOperator("ToKinesis", new KinesisStringOutputOperator());
outputOp.setBatchSize(500);
// Feed the statuses from feed into the input of the Hashtag extractor.
dag.addStream("TweetStream", twitterFeed.status, HashtagExtractor.input).setLocality(Locality.CONTAINER_LOCAL);
// Start counting the Hashtags coming out of Hashtag extractor
dag.addStream("SendToKinesis", HashtagExtractor.hashtags, outputOp.inputPort).setLocality(locality);
// ------------------------------------------------------------------------------------------
KinesisStringInputOperator inputOp = dag.addOperator("FromKinesis", new KinesisStringInputOperator());
ShardManager shardStats = new ShardManager();
inputOp.setShardManager(shardStats);
inputOp.getConsumer().setRecordsLimit(600);
inputOp.setStrategy(AbstractKinesisInputOperator.PartitionStrategy.MANY_TO_ONE.toString());
// Setup a node to count the unique Hashtags within a window.
UniqueCounter<String> uniqueCounter = dag.addOperator("UniqueHashtagCounter", new UniqueCounter<String>());
// Get the aggregated Hashtag counts and count them over last 5 mins.
WindowedTopCounter<String> topCounts = dag.addOperator("TopCounter", new WindowedTopCounter<String>());
topCounts.setTopCount(10);
topCounts.setSlidingWindowWidth(600);
topCounts.setDagWindowWidth(1);
dag.addStream("TwittedHashtags", inputOp.outputPort, uniqueCounter.data).setLocality(locality);
// Count unique Hashtags
dag.addStream("UniqueHashtagCounts", uniqueCounter.count, topCounts.input).setLocality(locality);
// Count top 10
dag.addStream("TopHashtags", topCounts.output, consoleOutput(dag, "topHashtags")).setLocality(locality);
}
use of org.apache.apex.malhar.contrib.twitter.TwitterSampleInput in project apex-malhar by apache.
the class TwitterDumpApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
// dag.setAttribute(DAGContext.APPLICATION_NAME, "TweetsDump");
TwitterSampleInput twitterStream = dag.addOperator("TweetSampler", new TwitterSampleInput());
// ConsoleOutputOperator dbWriter = dag.addOperator("DatabaseWriter", new ConsoleOutputOperator());
Status2Database dbWriter = dag.addOperator("DatabaseWriter", new Status2Database());
dbWriter.getStore().setDatabaseDriver("com.mysql.jdbc.Driver");
dbWriter.getStore().setDatabaseUrl("jdbc:mysql://node6.morado.com:3306/twitter");
dbWriter.getStore().setConnectionProperties("user:twitter");
dag.addStream("Statuses", twitterStream.status, dbWriter.input).setLocality(Locality.CONTAINER_LOCAL);
}
use of org.apache.apex.malhar.contrib.twitter.TwitterSampleInput in project apex-malhar by apache.
the class TwitterDumpHBaseApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
// dag.setAttribute(DAGContext.APPLICATION_NAME, "TweetsDump");
TwitterSampleInput twitterStream = dag.addOperator("TweetSampler", new TwitterSampleInput());
Status2Hbase hBaseWriter = dag.addOperator("DatabaseWriter", new Status2Hbase());
dag.addStream("Statuses", twitterStream.status, hBaseWriter.input).setLocality(Locality.CONTAINER_LOCAL);
}
Aggregations