use of storm.trident.operation.builtin.Count in project jstorm by alibaba.
the class TridentWordCountTest method testTridentWordCount.
// to make sure the validator is right
@Test
public void testTridentWordCount() {
LocalDRPC localDRPC = new LocalDRPC();
FixedLimitBatchSpout spout = new FixedLimitBatchSpout(SPOUT_LIMIT, new Fields("sentence"), SPOUT_BATCH_SIZE, new Values("the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"), new Values("how many apples can you eat"), new Values("to be or not to be the person"));
TridentTopology tridentTopology = new TridentTopology();
TridentState wordCount = tridentTopology.newStream("spout", spout).parallelismHint(1).each(new Fields("sentence"), new Split(), new Fields("word")).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")).parallelismHint(16);
tridentTopology.newDRPCStream("words", localDRPC).each(new Fields("args"), new Split(), new Fields("keyword")).groupBy(new Fields("keyword")).stateQuery(wordCount, new Fields("keyword"), new MapGet(), new Fields("result")).each(new Fields("result"), new FilterNull()).aggregate(new Fields("result"), new Sum(), new Fields("sum"));
Map config = new HashMap();
config.put(Config.TOPOLOGY_NAME, "TridentWordCountTest");
JStormUnitTestValidator validator = new JStormUnitTestDRPCValidator(localDRPC) {
Logger LOG = LoggerFactory.getLogger(JStormUnitTestValidator.class);
@Override
public boolean validate(Map config) {
String queryResult = executeLocalDRPC("words", "the");
//the result is like [[8080]], so remove the [[]]
queryResult = queryResult.substring(2, queryResult.length() - 2);
//how many times of emit can finish a loop
int oneLoopNeedEmits = (int) Math.ceil(SPOUT_CONTENT_TYPES / (float) SPOUT_BATCH_SIZE);
// of all the spout content
//the loop time of the LimitFixBatchSpout content
int loopTime = SPOUT_LIMIT / oneLoopNeedEmits;
int receiveCountOfThe = Integer.valueOf(queryResult);
LOG.info("Final receive total " + receiveCountOfThe + " \"the\" when expected " + (loopTime * 5));
//5 "the" are in one loop
boolean isCountOfTheRight = (receiveCountOfThe == loopTime * 5);
// query the word count of these 3 words total
queryResult = executeLocalDRPC("words", "be store kujou");
queryResult = queryResult.substring(2, queryResult.length() - 2);
int receiveCountOfBeAndStore = Integer.valueOf(queryResult);
LOG.info("Final receive total " + receiveCountOfBeAndStore + " \"be\" and \"store\" and \"kujou\" " + "when expected " + (loopTime * 3));
//2 "be" 1 "store" 0 "kujou" are in one loop
boolean isCountOfBeAndStoreRight = (receiveCountOfBeAndStore == loopTime * 3);
return isCountOfTheRight && isCountOfBeAndStoreRight;
}
};
try {
boolean result = JStormUnitTestRunner.submitTopology(tridentTopology.build(), config, 90, validator);
assertTrue("Topology should pass the validator", result);
} finally {
localDRPC.shutdown();
}
}
use of storm.trident.operation.builtin.Count in project jstorm by alibaba.
the class TridentTopologySource method getTopology.
public StormTopology getTopology(Config config) {
this.spout = new FixedBatchSpout(new Fields("sentence"), 20, new Values("one two"), new Values("two three"), new Values("three four"), new Values("four five"), new Values("five six"));
TridentTopology trident = new TridentTopology();
trident.newStream("wordcount", spout).name("sentence").parallelismHint(1).shuffle().each(new Fields("sentence"), new Split(), new Fields("word")).parallelismHint(1).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")).parallelismHint(1);
return trident.build();
}
use of storm.trident.operation.builtin.Count in project storm-hbase by jrkinley.
the class HBaseTridentAggregateTopology method main.
/**
* @param args
* @throws InterruptedException
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public static void main(String[] args) throws InterruptedException {
List<Object> v0 = HBaseCountersBatchTopology.values.get(0).get(0);
List<Object> v1 = HBaseCountersBatchTopology.values.get(0).get(1);
List<Object> v2 = HBaseCountersBatchTopology.values.get(0).get(2);
List<Object> v3 = HBaseCountersBatchTopology.values.get(0).get(3);
List<Object> v4 = HBaseCountersBatchTopology.values.get(0).get(4);
List<Object> v5 = HBaseCountersBatchTopology.values.get(1).get(0);
List<Object> v6 = HBaseCountersBatchTopology.values.get(1).get(1);
List<Object> v7 = HBaseCountersBatchTopology.values.get(1).get(2);
List<Object> v8 = HBaseCountersBatchTopology.values.get(2).get(0);
List<Object> v9 = HBaseCountersBatchTopology.values.get(2).get(1);
List<Object> v10 = HBaseCountersBatchTopology.values.get(2).get(2);
HBaseCountersBatchTopology.values.values();
FixedBatchSpout spout = new FixedBatchSpout(new Fields("shortid", "url", "user", "date"), 3, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10);
spout.setCycle(false);
TridentConfig config = new TridentConfig("shorturl", "shortid");
config.setBatch(false);
StateFactory state = HBaseAggregateState.transactional(config);
TridentTopology topology = new TridentTopology();
topology.newStream("spout", spout).each(new Fields("shortid", "date"), new DatePartitionFunction(), new Fields("cf", "cq")).project(new Fields("shortid", "cf", "cq")).groupBy(new Fields("shortid", "cf", "cq")).persistentAggregate(state, new Count(), new Fields("count"));
Config conf = new Config();
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("hbase-trident-aggregate", conf, topology.build());
Utils.sleep(5000);
cluster.shutdown();
}
use of storm.trident.operation.builtin.Count in project jstorm by alibaba.
the class TridentFastWordCount method buildTopology.
public static StormTopology buildTopology(LocalDRPC drpc) {
FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"), new Values("how many apples can you eat"), new Values("to be or not to be the person"), new Values("marry had a little lamb whos fleese was white as snow"), new Values("and every where that marry went the lamb was sure to go"), new Values("one two three four five six seven eight nine ten"), new Values("this is a test of the emergency broadcast system this is only a test"), new Values("peter piper picked a peck of pickeled peppers"), new Values("JStorm is a distributed and fault-tolerant realtime computation system."), new Values("Inspired by Apache Storm, JStorm has been completely rewritten in Java and provides many more enhanced features."), new Values("JStorm has been widely used in many enterprise environments and proved robust and stable."), new Values("JStorm provides a distributed programming framework very similar to Hadoop MapReduce."), new Values("The developer only needs to compose his/her own pipe-lined computation logic by implementing the JStorm API"), new Values(" which is fully compatible with Apache Storm API"), new Values("and submit the composed Topology to a working JStorm instance."), new Values("Similar to Hadoop MapReduce, JStorm computes on a DAG (directed acyclic graph)."), new Values("Different from Hadoop MapReduce, a JStorm topology runs 24 * 7"), new Values("the very nature of its continuity abd 100% in-memory architecture "), new Values("has been proved a particularly suitable solution for streaming data and real-time computation."), new Values("JStorm guarantees fault-tolerance."), new Values("Whenever a worker process crashes, "), new Values("the scheduler embedded in the JStorm instance immediately spawns a new worker process to take the place of the failed one."), new Values(" The Acking framework provided by JStorm guarantees that every single piece of data will be processed at least once."));
spout.setCycle(true);
int spout_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_SPOUT_PARALLELISM_HINT), 1);
int split_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_SPLIT_PARALLELISM_HINT), 2);
int count_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_COUNT_PARALLELISM_HINT), 2);
TridentTopology topology = new TridentTopology();
TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(spout_Parallelism_hint).each(new Fields("sentence"), new Split(), new Fields("word")).parallelismHint(split_Parallelism_hint).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")).parallelismHint(count_Parallelism_hint);
return topology.build();
}
use of storm.trident.operation.builtin.Count in project jstorm by alibaba.
the class TridentMapExample method buildTopology.
public static StormTopology buildTopology(LocalDRPC drpc) {
FixedBatchSpout spout = new FixedBatchSpout(new Fields("word"), 3, new Values("the cow jumped over the moon"), new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"), new Values("how many apples can you eat"), new Values("to be or not to be the person"));
spout.setCycle(true);
TridentTopology topology = new TridentTopology();
TridentState wordCounts = topology.newStream("spout1", spout).parallelismHint(16).flatMap(split).map(toUpper).filter(theFilter).peek(new Consumer() {
@Override
public void accept(TridentTuple input) {
System.out.println(input.getString(0));
}
}).groupBy(new Fields("word")).persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")).parallelismHint(16);
topology.newDRPCStream("words", drpc).flatMap(split).groupBy(new Fields("args")).stateQuery(wordCounts, new Fields("args"), new MapGet(), new Fields("count")).filter(new FilterNull()).aggregate(new Fields("count"), new Sum(), new Fields("sum"));
return topology.build();
}
Aggregations