use of org.apache.apex.malhar.lib.util.KeyValPair in project apex-malhar by apache.
the class WindowedWordCount method populateDAG.
/**
* Populate dag with High-Level API.
* @param dag
* @param conf
*/
@Override
public void populateDAG(DAG dag, Configuration conf) {
TextInput input = new TextInput();
Collector collector = new Collector();
// Create stream from the TextInput operator.
ApexStream<Tuple.TimestampedTuple<String>> stream = StreamFactory.fromInput(input, input.output, name("input")).flatMap(new Function.FlatMapFunction<String, String>() {
@Override
public Iterable<String> f(String input) {
return Arrays.asList(input.split("[\\p{Punct}\\s]+"));
}
}, name("ExtractWords")).map(new AddTimestampFn(), name("AddTimestampFn"));
// apply window and trigger option.
// TODO: change trigger option to atWaterMark when available.
WindowedStream<Tuple.TimestampedTuple<String>> windowedWords = stream.window(new WindowOption.TimeWindows(Duration.standardMinutes(WINDOW_SIZE)), new TriggerOption().accumulatingFiredPanes().withEarlyFiringsAtEvery(1));
WindowedStream<PojoEvent> wordCounts = // Perform a countByKey transformation to count the appearance of each word in every time window.
windowedWords.countByKey(new Function.ToKeyValue<Tuple.TimestampedTuple<String>, String, Long>() {
@Override
public Tuple<KeyValPair<String, Long>> f(Tuple.TimestampedTuple<String> input) {
return new Tuple.TimestampedTuple<KeyValPair<String, Long>>(input.getTimestamp(), new KeyValPair<String, Long>(input.getValue(), 1L));
}
}, name("count words")).map(new FormatAsTableRowFn(), name("FormatAsTableRowFn")).print(name("console"));
wordCounts.endWith(collector, collector.input, name("Collector")).populateDag(dag);
}
use of org.apache.apex.malhar.lib.util.KeyValPair in project apex-malhar by apache.
the class SeedEventGeneratorApp method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
SeedEventGenerator seedEvent = dag.addOperator("seedEvent", new SeedEventGenerator());
seedEvent.addKeyData("x", 0, 9);
seedEvent.addKeyData("y", 0, 9);
seedEvent.addKeyData("gender", 0, 1);
seedEvent.addKeyData("age", 10, 19);
DevNull<HashMap<String, String>> devString = dag.addOperator("devString", new DevNull<HashMap<String, String>>());
DevNull<HashMap<String, ArrayList<KeyValPair>>> devKeyVal = dag.addOperator("devKeyVal", new DevNull());
DevNull<HashMap<String, String>> devVal = dag.addOperator("devVal", new DevNull<HashMap<String, String>>());
DevNull<HashMap<String, ArrayList<Integer>>> devList = dag.addOperator("devList", new DevNull());
dag.getMeta(seedEvent).getMeta(seedEvent.string_data).getAttributes().put(PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
dag.addStream("SeedEventGeneratorString", seedEvent.string_data, devString.data).setLocality(locality);
dag.getMeta(seedEvent).getMeta(seedEvent.keyvalpair_list).getAttributes().put(PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
dag.addStream("SeedEventGeneratorKeyVal", seedEvent.keyvalpair_list, devKeyVal.data).setLocality(locality);
dag.getMeta(seedEvent).getMeta(seedEvent.val_data).getAttributes().put(PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
dag.addStream("SeedEventGeneratorVal", seedEvent.val_data, devVal.data).setLocality(locality);
dag.getMeta(seedEvent).getMeta(seedEvent.val_list).getAttributes().put(PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
dag.addStream("SeedEventGeneratorValList", seedEvent.val_list, devList.data).setLocality(locality);
}
Aggregations