use of com.tdunning.plume.DoFn in project Plume by tdunning.
the class MSCRReducer method reduce.
@SuppressWarnings("unchecked")
protected void reduce(final PlumeObject arg0, java.lang.Iterable<PlumeObject> values, Reducer<PlumeObject, PlumeObject, NullWritable, NullWritable>.Context arg2) throws IOException, InterruptedException {
PCollection col = mscr.getChannelByNumber().get(arg0.sourceId);
OutputChannel oC = mscr.getOutputChannels().get(col);
if (oC.reducer != null) {
// apply reducer
ParallelDo pDo = oC.reducer;
// TODO how to check / report this
DoFn reducer = pDo.getFunction();
List<WritableComparable> vals = Lists.newArrayList();
for (PlumeObject val : values) {
vals.add(val.obj);
}
reducer.process(Pair.create(arg0.obj, vals), new EmitFn() {
@Override
public void emit(Object v) {
try {
if (v instanceof Pair) {
Pair p = (Pair) v;
mos.write(arg0.sourceId + "", p.getKey(), p.getValue());
} else {
mos.write(arg0.sourceId + "", NullWritable.get(), (WritableComparable) v);
}
} catch (Exception e) {
// TODO How to report this
e.printStackTrace();
}
}
});
} else {
// direct writing - write all key, value pairs
for (PlumeObject val : values) {
if (oC.output instanceof PTable) {
mos.write(arg0.sourceId + "", arg0.obj, val.obj);
} else {
mos.write(arg0.sourceId + "", NullWritable.get(), val.obj);
}
}
}
}
use of com.tdunning.plume.DoFn in project Plume by tdunning.
the class Optimizer method fuseParallelDos.
/**
* Fuse producer-consumer ParallelDos as in : {Orig2 => p2 => Orig1 => p1 => Output} to {Orig2 => p1(p2) => Output}
* @param arg The collection that may have compositions internally.
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
<T> void fuseParallelDos(PCollection<T> arg) {
LazyCollection<T> output = (LazyCollection<T>) arg;
if (output.isMaterialized()) {
// stop condition for recursive algorithm
return;
}
DeferredOp dOp = output.getDeferredOp();
if (!(dOp instanceof ParallelDo)) {
// not a ParallelDo
if (dOp instanceof OneToOneOp) {
// Recursively apply this function to parent
fuseParallelDos(((OneToOneOp) dOp).getOrigin());
return;
}
if (dOp instanceof Flatten) {
Flatten<T> flatten = (Flatten) dOp;
// Recursively apply this function to all parents
for (PCollection<T> col : flatten.getOrigins()) {
fuseParallelDos(col);
}
return;
}
}
ParallelDo p1 = (ParallelDo) output.getDeferredOp();
LazyCollection orig1 = (LazyCollection) p1.getOrigin();
if (orig1.isMaterialized()) {
return;
}
if (!(orig1.getDeferredOp() instanceof ParallelDo)) {
// Recursively apply this function to parent node
fuseParallelDos(orig1);
return;
}
// At this point we know ParallelDo fusion can be done -> Perform it
ParallelDo p2 = (ParallelDo) orig1.getDeferredOp();
// Lift combine values
if (p2 instanceof CombineValues) {
LazyCollection lCol = (LazyCollection) p2.getOrigin();
if (!lCol.isMaterialized() && lCol.getDeferredOp() instanceof GroupByKey) {
// Upper parallel do is CombineValues and follows a GroupByKey -> don't join
fuseParallelDos(orig1);
return;
}
}
final DoFn f1 = p1.getFunction();
final DoFn f2 = p2.getFunction();
// Define the joined function
DoFn newFn = new DoFn() {
@Override
public void process(Object v, final EmitFn emitter) {
f2.process(v, new EmitFn() {
@Override
public void emit(Object v) {
f1.process(v, emitter);
}
});
}
};
LazyCollection orig2 = (LazyCollection) p2.getOrigin();
ParallelDo newPDo = new ParallelDo(newFn, orig2, output);
// Clean & change pointers
orig2.downOps.remove(p2);
orig1.downOps.remove(p1);
orig2.addDownOp(newPDo);
output.deferredOp = newPDo;
// Recursively apply this function to the same node => TODO Beware infinite recursion, properly test
fuseParallelDos(output);
}
Aggregations