use of com.tdunning.plume.local.lazy.op.MultipleParallelDo in project Plume by tdunning.
the class MSCRMapper method map.
@SuppressWarnings("unchecked")
protected void map(WritableComparable key, WritableComparable value, final Mapper<WritableComparable, WritableComparable, PlumeObject, PlumeObject>.Context<WritableComparable, WritableComparable, PlumeObject, PlumeObject> context) throws IOException, InterruptedException {
LazyCollection<?> l = null;
FileSplit fS = FileInputSplitWrapper.getFileInputSplit(context);
// Get LazyCollection for this input (according to FileSplit)
for (PCollection<?> input : mscr.getInputs()) {
LazyCollection<?> thisL = (LazyCollection<?>) input;
if (thisL.getFile() == null) {
// Convention for intermediate results
thisL.setFile(tmpFolder + "/" + thisL.getPlumeId());
}
if (fS.getPath().toString().startsWith(thisL.getFile()) || fS.getPath().toString().startsWith("file:" + thisL.getFile())) {
l = thisL;
break;
}
}
if (l == null) {
throw new RuntimeException("Unable to match input split with any MSCR input");
}
// If this collection is a table -> process Pair, otherwise process value
PCollectionType type = l.getType();
Object toProcess = value;
if (type instanceof PTableType) {
toProcess = Pair.create(key, value);
}
for (DeferredOp op : l.getDownOps()) {
if (op instanceof MultipleParallelDo) {
MultipleParallelDo mPDo = ((MultipleParallelDo) op);
for (Object entry : mPDo.getDests().entrySet()) {
Map.Entry<PCollection, DoFn> en = (Map.Entry<PCollection, DoFn>) entry;
LazyCollection<?> lCol = (LazyCollection<?>) en.getKey();
DeferredOp childOp = null;
if (lCol.getDownOps() != null && lCol.getDownOps().size() > 0) {
childOp = lCol.getDownOps().get(0);
}
final Integer channel;
if (childOp != null && childOp instanceof Flatten) {
channel = mscr.getNumberedChannels().get(((Flatten) childOp).getDest());
} else if (childOp != null && childOp instanceof GroupByKey) {
channel = mscr.getNumberedChannels().get(((GroupByKey) childOp).getOrigin());
} else {
// bypass channel?
channel = mscr.getNumberedChannels().get(en.getKey());
}
if (channel == null) {
// This is not for this MSCR - just skip it
return;
}
// Call parallelDo function
en.getValue().process(toProcess, new EmitFn() {
@Override
public void emit(Object v) {
try {
if (v instanceof Pair) {
Pair p = (Pair) v;
context.write(new PlumeObject((WritableComparable) p.getKey(), channel), new PlumeObject((WritableComparable) p.getValue(), channel));
} else {
context.write(new PlumeObject((WritableComparable) v, channel), new PlumeObject((WritableComparable) v, channel));
}
} catch (Exception e) {
// TODO How to report this
e.printStackTrace();
}
}
});
}
} else {
if (op instanceof Flatten) {
l = (LazyCollection) ((Flatten) op).getDest();
}
int channel = mscr.getNumberedChannels().get(l);
if (toProcess instanceof Pair) {
context.write(new PlumeObject(key, channel), new PlumeObject(value, channel));
} else {
context.write(new PlumeObject(value, channel), new PlumeObject(value, channel));
}
}
}
}
use of com.tdunning.plume.local.lazy.op.MultipleParallelDo in project Plume by tdunning.
the class Optimizer method fuseSiblingParallelDos.
/**
* Join ParallelDos that use the same PCollection into multiple-output {@link MultipleParallelDo}
* @param arg The original collection that may contain sibling do chains
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
<T> void fuseSiblingParallelDos(PCollection<T> arg) {
LazyCollection<T> output = (LazyCollection<T>) arg;
if (output.isMaterialized()) {
// stop condition for recursive algorithm
return;
}
DeferredOp dOp = output.getDeferredOp();
if (!(dOp instanceof ParallelDo)) {
// not a ParallelDo
if (dOp instanceof OneToOneOp) {
// Recursively apply this function to parent
fuseSiblingParallelDos(((OneToOneOp) dOp).getOrigin());
return;
}
if (dOp instanceof Flatten) {
Flatten<T> flatten = (Flatten) dOp;
// Recursively apply this function to all parents
for (PCollection<T> col : flatten.getOrigins()) {
fuseSiblingParallelDos(col);
}
return;
}
if (dOp instanceof MultipleParallelDo) {
return;
}
}
ParallelDo pDo = (ParallelDo) output.getDeferredOp();
LazyCollection<T> orig = (LazyCollection<T>) pDo.getOrigin();
int willAdd = 0;
for (DeferredOp op : orig.getDownOps()) {
if (op instanceof ParallelDo) {
willAdd++;
}
}
if (willAdd == 1) {
// Parent doesn't have more ParallelDos to fuse
// Recursively apply this function to parent
fuseSiblingParallelDos(orig);
return;
}
// MultipleParallelDo is viable, create it
MultipleParallelDo<T> mPDo = new MultipleParallelDo<T>(orig);
mPDo.addDest(pDo.getFunction(), output);
orig.downOps.remove(pDo);
output.deferredOp = mPDo;
List<DeferredOp> newList = new ArrayList<DeferredOp>();
for (DeferredOp op : orig.getDownOps()) {
if (op instanceof ParallelDo) {
ParallelDo thisPDo = (ParallelDo) op;
mPDo.addDest(thisPDo.getFunction(), thisPDo.getDest());
LazyCollection thisDest = (LazyCollection) thisPDo.getDest();
thisDest.deferredOp = mPDo;
} else {
newList.add(op);
}
}
newList.add(mPDo);
orig.downOps = newList;
// Recursively apply this function to parent
fuseSiblingParallelDos(orig);
}
use of com.tdunning.plume.local.lazy.op.MultipleParallelDo in project Plume by tdunning.
the class OptimizerTools method getAll.
static List<DeferredOp> getAll(PCollection<?> output, Class<? extends DeferredOp> getClass) {
List<DeferredOp> retOps = new ArrayList<DeferredOp>();
Stack<LazyCollection<?>> toVisit = new Stack<LazyCollection<?>>();
Set<LazyCollection<?>> visited = new HashSet<LazyCollection<?>>();
toVisit.push((LazyCollection<?>) output);
while (!toVisit.isEmpty()) {
LazyCollection<?> current = toVisit.pop();
visited.add(current);
if (current.isMaterialized()) {
continue;
}
DeferredOp op = current.getDeferredOp();
if (op.getClass().equals(getClass)) {
// Found
if (!retOps.contains(op)) {
retOps.add(op);
}
}
// Add more nodes to visit
List<DeferredOp> ops = Lists.newArrayList();
ops.add(op);
for (DeferredOp o : ops) {
if (o instanceof Flatten) {
for (PCollection<?> input : ((Flatten<?>) o).getOrigins()) {
LazyCollection<?> in = (LazyCollection<?>) input;
if (!visited.contains(in)) {
toVisit.push(in);
}
}
continue;
}
if (o instanceof OneToOneOp) {
LazyCollection<?> input = (LazyCollection<?>) ((OneToOneOp<?, ?>) o).getOrigin();
if (!visited.contains(input)) {
toVisit.push(input);
}
continue;
}
if (o instanceof MultipleParallelDo) {
MultipleParallelDo<?> mPDo = (MultipleParallelDo<?>) o;
LazyCollection<?> input = (LazyCollection<?>) mPDo.getOrigin();
if (!visited.contains(input)) {
toVisit.push(input);
}
}
}
}
return retOps;
}
Aggregations