use of org.apache.flink.storm.wordcount.operators.BoltTokenizerByName in project flink by apache.
the class BoltTokenizerWordCountPojo method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data
final DataStream<Sentence> text = getTextDataStream(env);
final DataStream<Tuple2<String, Integer>> counts = text.transform("BoltTokenizerPojo", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Sentence, Tuple2<String, Integer>>(new BoltTokenizerByName())).keyBy(0).sum(1);
// emit result
if (fileOutput) {
counts.writeAsText(outputPath);
} else {
counts.print();
}
// execute program
env.execute("Streaming WordCount with POJO bolt tokenizer");
}
use of org.apache.flink.storm.wordcount.operators.BoltTokenizerByName in project flink by apache.
the class BoltTokenizerWordCountWithNames method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data
final DataStream<Tuple1<String>> text = getTextDataStream(env);
final DataStream<Tuple2<String, Integer>> counts = text.transform("BoltTokenizerWithNames", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Tuple1<String>, Tuple2<String, Integer>>(new BoltTokenizerByName(), new Fields("sentence"))).keyBy(0).sum(1);
// emit result
if (fileOutput) {
counts.writeAsText(outputPath);
} else {
counts.print();
}
// execute program
env.execute("Streaming WordCount with schema bolt tokenizer");
}
use of org.apache.flink.storm.wordcount.operators.BoltTokenizerByName in project flink by apache.
the class WordCountTopology method buildTopology.
public static TopologyBuilder buildTopology(boolean indexOrName) {
final TopologyBuilder builder = new TopologyBuilder();
// get input data
if (fileInputOutput) {
// read the text file from given input path
final String[] tokens = textPath.split(":");
final String inputFile = tokens[tokens.length - 1];
// inserting NullTerminatingSpout only required to stabilize integration test
builder.setSpout(spoutId, new NullTerminatingSpout(new WordCountFileSpout(inputFile)));
} else {
builder.setSpout(spoutId, new WordCountInMemorySpout());
}
if (indexOrName) {
// split up the lines in pairs (2-tuples) containing: (word,1)
builder.setBolt(tokenierzerId, new BoltTokenizer(), 4).shuffleGrouping(spoutId);
// group by the tuple field "0" and sum up tuple field "1"
builder.setBolt(counterId, new BoltCounter(), 4).fieldsGrouping(tokenierzerId, new Fields(BoltTokenizer.ATTRIBUTE_WORD));
} else {
// split up the lines in pairs (2-tuples) containing: (word,1)
builder.setBolt(tokenierzerId, new BoltTokenizerByName(), 4).shuffleGrouping(spoutId);
// group by the tuple field "0" and sum up tuple field "1"
builder.setBolt(counterId, new BoltCounterByName(), 4).fieldsGrouping(tokenierzerId, new Fields(BoltTokenizerByName.ATTRIBUTE_WORD));
}
// emit result
if (fileInputOutput) {
// read the text file from given input path
final String[] tokens = outputPath.split(":");
final String outputFile = tokens[tokens.length - 1];
builder.setBolt(sinkId, new BoltFileSink(outputFile, formatter)).shuffleGrouping(counterId);
} else {
builder.setBolt(sinkId, new BoltPrintSink(formatter), 4).shuffleGrouping(counterId);
}
return builder;
}
Aggregations