Search in sources :

Example 6 with MultipleParallelDo

use of com.tdunning.plume.local.lazy.op.MultipleParallelDo in project Plume by tdunning.

the class MSCRMapper method map.

@SuppressWarnings("unchecked")
protected void map(WritableComparable key, WritableComparable value, final Mapper<WritableComparable, WritableComparable, PlumeObject, PlumeObject>.Context<WritableComparable, WritableComparable, PlumeObject, PlumeObject> context) throws IOException, InterruptedException {
    LazyCollection<?> l = null;
    FileSplit fS = FileInputSplitWrapper.getFileInputSplit(context);
    // Get LazyCollection for this input (according to FileSplit)
    for (PCollection<?> input : mscr.getInputs()) {
        LazyCollection<?> thisL = (LazyCollection<?>) input;
        if (thisL.getFile() == null) {
            // Convention for intermediate results
            thisL.setFile(tmpFolder + "/" + thisL.getPlumeId());
        }
        if (fS.getPath().toString().startsWith(thisL.getFile()) || fS.getPath().toString().startsWith("file:" + thisL.getFile())) {
            l = thisL;
            break;
        }
    }
    if (l == null) {
        throw new RuntimeException("Unable to match input split with any MSCR input");
    }
    // If this collection is a table -> process Pair, otherwise process value
    PCollectionType type = l.getType();
    Object toProcess = value;
    if (type instanceof PTableType) {
        toProcess = Pair.create(key, value);
    }
    for (DeferredOp op : l.getDownOps()) {
        if (op instanceof MultipleParallelDo) {
            MultipleParallelDo mPDo = ((MultipleParallelDo) op);
            for (Object entry : mPDo.getDests().entrySet()) {
                Map.Entry<PCollection, DoFn> en = (Map.Entry<PCollection, DoFn>) entry;
                LazyCollection<?> lCol = (LazyCollection<?>) en.getKey();
                DeferredOp childOp = null;
                if (lCol.getDownOps() != null && lCol.getDownOps().size() > 0) {
                    childOp = lCol.getDownOps().get(0);
                }
                final Integer channel;
                if (childOp != null && childOp instanceof Flatten) {
                    channel = mscr.getNumberedChannels().get(((Flatten) childOp).getDest());
                } else if (childOp != null && childOp instanceof GroupByKey) {
                    channel = mscr.getNumberedChannels().get(((GroupByKey) childOp).getOrigin());
                } else {
                    // bypass channel?
                    channel = mscr.getNumberedChannels().get(en.getKey());
                }
                if (channel == null) {
                    // This is not for this MSCR - just skip it
                    return;
                }
                // Call parallelDo function
                en.getValue().process(toProcess, new EmitFn() {

                    @Override
                    public void emit(Object v) {
                        try {
                            if (v instanceof Pair) {
                                Pair p = (Pair) v;
                                context.write(new PlumeObject((WritableComparable) p.getKey(), channel), new PlumeObject((WritableComparable) p.getValue(), channel));
                            } else {
                                context.write(new PlumeObject((WritableComparable) v, channel), new PlumeObject((WritableComparable) v, channel));
                            }
                        } catch (Exception e) {
                            // TODO How to report this
                            e.printStackTrace();
                        }
                    }
                });
            }
        } else {
            if (op instanceof Flatten) {
                l = (LazyCollection) ((Flatten) op).getDest();
            }
            int channel = mscr.getNumberedChannels().get(l);
            if (toProcess instanceof Pair) {
                context.write(new PlumeObject(key, channel), new PlumeObject(value, channel));
            } else {
                context.write(new PlumeObject(value, channel), new PlumeObject(value, channel));
            }
        }
    }
}
Also used : MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) GroupByKey(com.tdunning.plume.local.lazy.op.GroupByKey) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) PTableType(com.tdunning.plume.types.PTableType) Flatten(com.tdunning.plume.local.lazy.op.Flatten) PCollectionType(com.tdunning.plume.types.PCollectionType) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) DeferredOp(com.tdunning.plume.local.lazy.op.DeferredOp) IOException(java.io.IOException) PCollection(com.tdunning.plume.PCollection) DoFn(com.tdunning.plume.DoFn) EmitFn(com.tdunning.plume.EmitFn) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) Map(java.util.Map) Pair(com.tdunning.plume.Pair)

Example 7 with MultipleParallelDo

use of com.tdunning.plume.local.lazy.op.MultipleParallelDo in project Plume by tdunning.

the class Optimizer method fuseSiblingParallelDos.

/**
   * Join ParallelDos that use the same PCollection into multiple-output {@link MultipleParallelDo}
   * @param arg  The original collection that may contain sibling do chains
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
<T> void fuseSiblingParallelDos(PCollection<T> arg) {
    LazyCollection<T> output = (LazyCollection<T>) arg;
    if (output.isMaterialized()) {
        // stop condition for recursive algorithm
        return;
    }
    DeferredOp dOp = output.getDeferredOp();
    if (!(dOp instanceof ParallelDo)) {
        // not a ParallelDo
        if (dOp instanceof OneToOneOp) {
            // Recursively apply this function to parent
            fuseSiblingParallelDos(((OneToOneOp) dOp).getOrigin());
            return;
        }
        if (dOp instanceof Flatten) {
            Flatten<T> flatten = (Flatten) dOp;
            // Recursively apply this function to all parents
            for (PCollection<T> col : flatten.getOrigins()) {
                fuseSiblingParallelDos(col);
            }
            return;
        }
        if (dOp instanceof MultipleParallelDo) {
            return;
        }
    }
    ParallelDo pDo = (ParallelDo) output.getDeferredOp();
    LazyCollection<T> orig = (LazyCollection<T>) pDo.getOrigin();
    int willAdd = 0;
    for (DeferredOp op : orig.getDownOps()) {
        if (op instanceof ParallelDo) {
            willAdd++;
        }
    }
    if (willAdd == 1) {
        // Parent doesn't have more ParallelDos to fuse
        // Recursively apply this function to parent
        fuseSiblingParallelDos(orig);
        return;
    }
    // MultipleParallelDo is viable, create it
    MultipleParallelDo<T> mPDo = new MultipleParallelDo<T>(orig);
    mPDo.addDest(pDo.getFunction(), output);
    orig.downOps.remove(pDo);
    output.deferredOp = mPDo;
    List<DeferredOp> newList = new ArrayList<DeferredOp>();
    for (DeferredOp op : orig.getDownOps()) {
        if (op instanceof ParallelDo) {
            ParallelDo thisPDo = (ParallelDo) op;
            mPDo.addDest(thisPDo.getFunction(), thisPDo.getDest());
            LazyCollection thisDest = (LazyCollection) thisPDo.getDest();
            thisDest.deferredOp = mPDo;
        } else {
            newList.add(op);
        }
    }
    newList.add(mPDo);
    orig.downOps = newList;
    // Recursively apply this function to parent
    fuseSiblingParallelDos(orig);
}
Also used : ParallelDo(com.tdunning.plume.local.lazy.op.ParallelDo) MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) Flatten(com.tdunning.plume.local.lazy.op.Flatten) ArrayList(java.util.ArrayList) OneToOneOp(com.tdunning.plume.local.lazy.op.OneToOneOp) DeferredOp(com.tdunning.plume.local.lazy.op.DeferredOp)

Example 8 with MultipleParallelDo

use of com.tdunning.plume.local.lazy.op.MultipleParallelDo in project Plume by tdunning.

the class OptimizerTools method getAll.

static List<DeferredOp> getAll(PCollection<?> output, Class<? extends DeferredOp> getClass) {
    List<DeferredOp> retOps = new ArrayList<DeferredOp>();
    Stack<LazyCollection<?>> toVisit = new Stack<LazyCollection<?>>();
    Set<LazyCollection<?>> visited = new HashSet<LazyCollection<?>>();
    toVisit.push((LazyCollection<?>) output);
    while (!toVisit.isEmpty()) {
        LazyCollection<?> current = toVisit.pop();
        visited.add(current);
        if (current.isMaterialized()) {
            continue;
        }
        DeferredOp op = current.getDeferredOp();
        if (op.getClass().equals(getClass)) {
            // Found 
            if (!retOps.contains(op)) {
                retOps.add(op);
            }
        }
        // Add more nodes to visit
        List<DeferredOp> ops = Lists.newArrayList();
        ops.add(op);
        for (DeferredOp o : ops) {
            if (o instanceof Flatten) {
                for (PCollection<?> input : ((Flatten<?>) o).getOrigins()) {
                    LazyCollection<?> in = (LazyCollection<?>) input;
                    if (!visited.contains(in)) {
                        toVisit.push(in);
                    }
                }
                continue;
            }
            if (o instanceof OneToOneOp) {
                LazyCollection<?> input = (LazyCollection<?>) ((OneToOneOp<?, ?>) o).getOrigin();
                if (!visited.contains(input)) {
                    toVisit.push(input);
                }
                continue;
            }
            if (o instanceof MultipleParallelDo) {
                MultipleParallelDo<?> mPDo = (MultipleParallelDo<?>) o;
                LazyCollection<?> input = (LazyCollection<?>) mPDo.getOrigin();
                if (!visited.contains(input)) {
                    toVisit.push(input);
                }
            }
        }
    }
    return retOps;
}
Also used : MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) ArrayList(java.util.ArrayList) Flatten(com.tdunning.plume.local.lazy.op.Flatten) OneToOneOp(com.tdunning.plume.local.lazy.op.OneToOneOp) DeferredOp(com.tdunning.plume.local.lazy.op.DeferredOp) Stack(java.util.Stack) HashSet(java.util.HashSet)

Aggregations

DeferredOp (com.tdunning.plume.local.lazy.op.DeferredOp)8 MultipleParallelDo (com.tdunning.plume.local.lazy.op.MultipleParallelDo)8 Flatten (com.tdunning.plume.local.lazy.op.Flatten)7 PCollection (com.tdunning.plume.PCollection)5 ParallelDo (com.tdunning.plume.local.lazy.op.ParallelDo)5 OneToOneOp (com.tdunning.plume.local.lazy.op.OneToOneOp)4 ArrayList (java.util.ArrayList)4 DoFn (com.tdunning.plume.DoFn)3 GroupByKey (com.tdunning.plume.local.lazy.op.GroupByKey)3 Map (java.util.Map)3 EmitFn (com.tdunning.plume.EmitFn)2 Pair (com.tdunning.plume.Pair)2 HashSet (java.util.HashSet)2 Stack (java.util.Stack)2 PlumeObject (com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject)1 CombineValues (com.tdunning.plume.local.lazy.op.CombineValues)1 PCollectionType (com.tdunning.plume.types.PCollectionType)1 PTableType (com.tdunning.plume.types.PTableType)1 IOException (java.io.IOException)1 Iterator (java.util.Iterator)1