Search in sources :

Example 11 with PCollection

use of com.tdunning.plume.PCollection in project Plume by tdunning.

the class TestOptimizer method testFigure4.

/**
   * Test figure 4 of FlumeJava paper
   */
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testFigure4() {
    // Get Plume runtime
    LazyPlume plume = new LazyPlume();
    // Create simple data 
    PCollection input1 = plume.fromJava(Lists.newArrayList(Pair.create(1, 1)));
    PCollection input2 = plume.fromJava(Lists.newArrayList(Pair.create(2, 2)));
    PCollection input3 = plume.fromJava(Lists.newArrayList(Pair.create(3, 3)));
    PCollection input4 = plume.fromJava(Lists.newArrayList(Pair.create(4, 4)));
    PCollection output1 = plume.flatten(tableOf(integers(), integers()), input1.map(identity, tableOf(integers(), integers())), input2.map(identity, tableOf(integers(), integers()))).groupByKey();
    PCollection output2 = plume.flatten(tableOf(integers(), integers()), input2.map(identity, tableOf(integers(), integers())), input3.map(identity, tableOf(integers(), integers())), input4.map(identity, tableOf(integers(), integers()))).groupByKey().combine(dummyCombiner).map(identity, null);
    PCollection output3 = plume.flatten(tableOf(integers(), integers()), input4.map(identity, tableOf(integers(), integers()))).groupByKey().map(identity, null);
    Optimizer optimizer = new Optimizer();
    ExecutionStep step = optimizer.optimize(Lists.newArrayList(input1, input2, input3, input4), Lists.newArrayList(output1, output2, output3));
    assertEquals(step.mscrSteps.size(), 1);
    assertEquals(step.nextStep, null);
}
Also used : PCollection(com.tdunning.plume.PCollection) Test(org.junit.Test)

Example 12 with PCollection

use of com.tdunning.plume.PCollection in project Plume by tdunning.

the class MSCRReducer method reduce.

@SuppressWarnings("unchecked")
protected void reduce(final PlumeObject arg0, java.lang.Iterable<PlumeObject> values, Reducer<PlumeObject, PlumeObject, NullWritable, NullWritable>.Context<PlumeObject, PlumeObject, NullWritable, NullWritable> arg2) throws IOException, InterruptedException {
    PCollection col = mscr.getChannelByNumber().get(arg0.sourceId);
    OutputChannel oC = mscr.getOutputChannels().get(col);
    if (oC.reducer != null) {
        // apply reducer
        ParallelDo pDo = oC.reducer;
        // TODO how to check / report this
        DoFn reducer = pDo.getFunction();
        List<WritableComparable> vals = Lists.newArrayList();
        for (PlumeObject val : values) {
            vals.add(val.obj);
        }
        reducer.process(Pair.create(arg0.obj, vals), new EmitFn() {

            @Override
            public void emit(Object v) {
                try {
                    if (v instanceof Pair) {
                        Pair p = (Pair) v;
                        mos.write(arg0.sourceId + "", p.getKey(), p.getValue());
                    } else {
                        mos.write(arg0.sourceId + "", NullWritable.get(), (WritableComparable) v);
                    }
                } catch (Exception e) {
                    // TODO How to report this
                    e.printStackTrace();
                }
            }
        });
    } else {
        // direct writing - write all key, value pairs
        for (PlumeObject val : values) {
            if (oC.output instanceof PTable) {
                mos.write(arg0.sourceId + "", arg0.obj, val.obj);
            } else {
                mos.write(arg0.sourceId + "", NullWritable.get(), val.obj);
            }
        }
    }
}
Also used : ParallelDo(com.tdunning.plume.local.lazy.op.ParallelDo) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) IOException(java.io.IOException) PTable(com.tdunning.plume.PTable) PCollection(com.tdunning.plume.PCollection) DoFn(com.tdunning.plume.DoFn) EmitFn(com.tdunning.plume.EmitFn) WritableComparable(org.apache.hadoop.io.WritableComparable) OutputChannel(com.tdunning.plume.local.lazy.MSCR.OutputChannel) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) Pair(com.tdunning.plume.Pair)

Example 13 with PCollection

use of com.tdunning.plume.PCollection in project Plume by tdunning.

the class MSCRCombiner method reduce.

@SuppressWarnings("unchecked")
protected void reduce(final PlumeObject arg0, java.lang.Iterable<PlumeObject> values, Reducer<PlumeObject, PlumeObject, PlumeObject, PlumeObject>.Context<PlumeObject, PlumeObject, PlumeObject, PlumeObject> context) throws IOException, InterruptedException {
    PCollection col = mscr.getChannelByNumber().get(arg0.sourceId);
    OutputChannel oC = mscr.getOutputChannels().get(col);
    if (oC.combiner != null) {
        // Apply combiner function for this channel
        List<WritableComparable> vals = Lists.newArrayList();
        for (PlumeObject val : values) {
            vals.add(val.obj);
        }
        WritableComparable result = (WritableComparable) oC.combiner.getCombiner().combine(vals);
        context.write(arg0, new PlumeObject(result, arg0.sourceId));
    } else {
        // direct writing - write all key, value pairs
        for (PlumeObject val : values) {
            context.write(arg0, val);
        }
    }
}
Also used : PCollection(com.tdunning.plume.PCollection) WritableComparable(org.apache.hadoop.io.WritableComparable) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) OutputChannel(com.tdunning.plume.local.lazy.MSCR.OutputChannel)

Aggregations

PCollection (com.tdunning.plume.PCollection)13 DeferredOp (com.tdunning.plume.local.lazy.op.DeferredOp)6 MultipleParallelDo (com.tdunning.plume.local.lazy.op.MultipleParallelDo)6 Flatten (com.tdunning.plume.local.lazy.op.Flatten)5 ParallelDo (com.tdunning.plume.local.lazy.op.ParallelDo)5 Map (java.util.Map)4 DoFn (com.tdunning.plume.DoFn)3 PlumeObject (com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject)3 OneToOneOp (com.tdunning.plume.local.lazy.op.OneToOneOp)3 EmitFn (com.tdunning.plume.EmitFn)2 Pair (com.tdunning.plume.Pair)2 OutputChannel (com.tdunning.plume.local.lazy.MSCR.OutputChannel)2 GroupByKey (com.tdunning.plume.local.lazy.op.GroupByKey)2 PTableType (com.tdunning.plume.types.PTableType)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 WritableComparable (org.apache.hadoop.io.WritableComparable)2 Test (org.junit.Test)2 PTable (com.tdunning.plume.PTable)1