Search in sources :

Example 41 with KeyValPair

use of org.apache.apex.malhar.lib.util.KeyValPair in project apex-malhar by apache.

the class WindowedWordCount method populateDAG.

/**
 * Populate dag with High-Level API.
 * @param dag
 * @param conf
 */
@Override
public void populateDAG(DAG dag, Configuration conf) {
    TextInput input = new TextInput();
    Collector collector = new Collector();
    // Create stream from the TextInput operator.
    ApexStream<Tuple.TimestampedTuple<String>> stream = StreamFactory.fromInput(input, input.output, name("input")).flatMap(new Function.FlatMapFunction<String, String>() {

        @Override
        public Iterable<String> f(String input) {
            return Arrays.asList(input.split("[\\p{Punct}\\s]+"));
        }
    }, name("ExtractWords")).map(new AddTimestampFn(), name("AddTimestampFn"));
    // apply window and trigger option.
    // TODO: change trigger option to atWaterMark when available.
    WindowedStream<Tuple.TimestampedTuple<String>> windowedWords = stream.window(new WindowOption.TimeWindows(Duration.standardMinutes(WINDOW_SIZE)), new TriggerOption().accumulatingFiredPanes().withEarlyFiringsAtEvery(1));
    WindowedStream<PojoEvent> wordCounts = // Perform a countByKey transformation to count the appearance of each word in every time window.
    windowedWords.countByKey(new Function.ToKeyValue<Tuple.TimestampedTuple<String>, String, Long>() {

        @Override
        public Tuple<KeyValPair<String, Long>> f(Tuple.TimestampedTuple<String> input) {
            return new Tuple.TimestampedTuple<KeyValPair<String, Long>>(input.getTimestamp(), new KeyValPair<String, Long>(input.getValue(), 1L));
        }
    }, name("count words")).map(new FormatAsTableRowFn(), name("FormatAsTableRowFn")).print(name("console"));
    wordCounts.endWith(collector, collector.input, name("Collector")).populateDag(dag);
}
Also used : TriggerOption(org.apache.apex.malhar.lib.window.TriggerOption) WindowOption(org.apache.apex.malhar.lib.window.WindowOption) Function(org.apache.apex.malhar.lib.function.Function) KeyValPair(org.apache.apex.malhar.lib.util.KeyValPair) Tuple(org.apache.apex.malhar.lib.window.Tuple)

Example 42 with KeyValPair

use of org.apache.apex.malhar.lib.util.KeyValPair in project apex-malhar by apache.

the class SeedEventGeneratorApp method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    SeedEventGenerator seedEvent = dag.addOperator("seedEvent", new SeedEventGenerator());
    seedEvent.addKeyData("x", 0, 9);
    seedEvent.addKeyData("y", 0, 9);
    seedEvent.addKeyData("gender", 0, 1);
    seedEvent.addKeyData("age", 10, 19);
    DevNull<HashMap<String, String>> devString = dag.addOperator("devString", new DevNull<HashMap<String, String>>());
    DevNull<HashMap<String, ArrayList<KeyValPair>>> devKeyVal = dag.addOperator("devKeyVal", new DevNull());
    DevNull<HashMap<String, String>> devVal = dag.addOperator("devVal", new DevNull<HashMap<String, String>>());
    DevNull<HashMap<String, ArrayList<Integer>>> devList = dag.addOperator("devList", new DevNull());
    dag.getMeta(seedEvent).getMeta(seedEvent.string_data).getAttributes().put(PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
    dag.addStream("SeedEventGeneratorString", seedEvent.string_data, devString.data).setLocality(locality);
    dag.getMeta(seedEvent).getMeta(seedEvent.keyvalpair_list).getAttributes().put(PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
    dag.addStream("SeedEventGeneratorKeyVal", seedEvent.keyvalpair_list, devKeyVal.data).setLocality(locality);
    dag.getMeta(seedEvent).getMeta(seedEvent.val_data).getAttributes().put(PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
    dag.addStream("SeedEventGeneratorVal", seedEvent.val_data, devVal.data).setLocality(locality);
    dag.getMeta(seedEvent).getMeta(seedEvent.val_list).getAttributes().put(PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
    dag.addStream("SeedEventGeneratorValList", seedEvent.val_list, devList.data).setLocality(locality);
}
Also used : SeedEventGenerator(org.apache.apex.malhar.lib.testbench.SeedEventGenerator) HashMap(java.util.HashMap) DevNull(org.apache.apex.malhar.lib.stream.DevNull) KeyValPair(org.apache.apex.malhar.lib.util.KeyValPair)

Aggregations

KeyValPair (org.apache.apex.malhar.lib.util.KeyValPair)42 Test (org.junit.Test)16 CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)15 Map (java.util.Map)10 HashMap (java.util.HashMap)9 ArrayList (java.util.ArrayList)4 List (java.util.List)4 WindowOption (org.apache.apex.malhar.lib.window.WindowOption)4 LocalMode (com.datatorrent.api.LocalMode)3 Calendar (java.util.Calendar)3 Date (java.util.Date)3 MachineInfo (org.apache.apex.examples.machinedata.data.MachineInfo)3 MachineKey (org.apache.apex.examples.machinedata.data.MachineKey)3 ResourceType (org.apache.apex.examples.machinedata.data.ResourceType)3 Function (org.apache.apex.malhar.lib.function.Function)3 TimeBucketKey (org.apache.apex.malhar.lib.util.TimeBucketKey)3 TriggerOption (org.apache.apex.malhar.lib.window.TriggerOption)3 MutableDouble (org.apache.commons.lang.mutable.MutableDouble)3 DAG (com.datatorrent.api.DAG)2 Multimap (com.google.common.collect.Multimap)2