Search in sources :

Example 1 with GroupByKey

use of com.tdunning.plume.local.lazy.op.GroupByKey in project Plume by tdunning.

the class MSCRMapper method map.

@SuppressWarnings("unchecked")
protected void map(WritableComparable key, WritableComparable value, final Mapper<WritableComparable, WritableComparable, PlumeObject, PlumeObject>.Context<WritableComparable, WritableComparable, PlumeObject, PlumeObject> context) throws IOException, InterruptedException {
    LazyCollection<?> l = null;
    FileSplit fS = FileInputSplitWrapper.getFileInputSplit(context);
    // Get LazyCollection for this input (according to FileSplit)
    for (PCollection<?> input : mscr.getInputs()) {
        LazyCollection<?> thisL = (LazyCollection<?>) input;
        if (thisL.getFile() == null) {
            // Convention for intermediate results
            thisL.setFile(tmpFolder + "/" + thisL.getPlumeId());
        }
        if (fS.getPath().toString().startsWith(thisL.getFile()) || fS.getPath().toString().startsWith("file:" + thisL.getFile())) {
            l = thisL;
            break;
        }
    }
    if (l == null) {
        throw new RuntimeException("Unable to match input split with any MSCR input");
    }
    // If this collection is a table -> process Pair, otherwise process value
    PCollectionType type = l.getType();
    Object toProcess = value;
    if (type instanceof PTableType) {
        toProcess = Pair.create(key, value);
    }
    for (DeferredOp op : l.getDownOps()) {
        if (op instanceof MultipleParallelDo) {
            MultipleParallelDo mPDo = ((MultipleParallelDo) op);
            for (Object entry : mPDo.getDests().entrySet()) {
                Map.Entry<PCollection, DoFn> en = (Map.Entry<PCollection, DoFn>) entry;
                LazyCollection<?> lCol = (LazyCollection<?>) en.getKey();
                DeferredOp childOp = null;
                if (lCol.getDownOps() != null && lCol.getDownOps().size() > 0) {
                    childOp = lCol.getDownOps().get(0);
                }
                final Integer channel;
                if (childOp != null && childOp instanceof Flatten) {
                    channel = mscr.getNumberedChannels().get(((Flatten) childOp).getDest());
                } else if (childOp != null && childOp instanceof GroupByKey) {
                    channel = mscr.getNumberedChannels().get(((GroupByKey) childOp).getOrigin());
                } else {
                    // bypass channel?
                    channel = mscr.getNumberedChannels().get(en.getKey());
                }
                if (channel == null) {
                    // This is not for this MSCR - just skip it
                    return;
                }
                // Call parallelDo function
                en.getValue().process(toProcess, new EmitFn() {

                    @Override
                    public void emit(Object v) {
                        try {
                            if (v instanceof Pair) {
                                Pair p = (Pair) v;
                                context.write(new PlumeObject((WritableComparable) p.getKey(), channel), new PlumeObject((WritableComparable) p.getValue(), channel));
                            } else {
                                context.write(new PlumeObject((WritableComparable) v, channel), new PlumeObject((WritableComparable) v, channel));
                            }
                        } catch (Exception e) {
                            // TODO How to report this
                            e.printStackTrace();
                        }
                    }
                });
            }
        } else {
            if (op instanceof Flatten) {
                l = (LazyCollection) ((Flatten) op).getDest();
            }
            int channel = mscr.getNumberedChannels().get(l);
            if (toProcess instanceof Pair) {
                context.write(new PlumeObject(key, channel), new PlumeObject(value, channel));
            } else {
                context.write(new PlumeObject(value, channel), new PlumeObject(value, channel));
            }
        }
    }
}
Also used : MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) GroupByKey(com.tdunning.plume.local.lazy.op.GroupByKey) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) PTableType(com.tdunning.plume.types.PTableType) Flatten(com.tdunning.plume.local.lazy.op.Flatten) PCollectionType(com.tdunning.plume.types.PCollectionType) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) DeferredOp(com.tdunning.plume.local.lazy.op.DeferredOp) IOException(java.io.IOException) PCollection(com.tdunning.plume.PCollection) DoFn(com.tdunning.plume.DoFn) EmitFn(com.tdunning.plume.EmitFn) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) Map(java.util.Map) Pair(com.tdunning.plume.Pair)

Example 2 with GroupByKey

use of com.tdunning.plume.local.lazy.op.GroupByKey in project Plume by tdunning.

the class MSCRMapper method map.

@SuppressWarnings("unchecked")
protected void map(WritableComparable key, WritableComparable value, final Mapper<WritableComparable, WritableComparable, PlumeObject, PlumeObject>.Context context) throws IOException, InterruptedException {
    LazyCollection<?> l = null;
    FileSplit fS = FileInputSplitWrapper.getFileInputSplit(context);
    // Get LazyCollection for this input (according to FileSplit)
    for (PCollection<?> input : mscr.getInputs()) {
        LazyCollection<?> thisL = (LazyCollection<?>) input;
        if (thisL.getFile() == null) {
            // Convention for intermediate results
            thisL.setFile(tmpFolder + "/" + thisL.getPlumeId());
        }
        if (fS.getPath().toString().startsWith(thisL.getFile()) || fS.getPath().toString().startsWith("file:" + thisL.getFile())) {
            l = thisL;
            break;
        }
    }
    if (l == null) {
        throw new RuntimeException("Unable to match input split with any MSCR input");
    }
    // If this collection is a table -> process Pair, otherwise process value
    PCollectionType type = l.getType();
    Object toProcess = value;
    if (type instanceof PTableType) {
        toProcess = Pair.create(key, value);
    }
    for (DeferredOp op : l.getDownOps()) {
        if (op instanceof MultipleParallelDo) {
            MultipleParallelDo mPDo = ((MultipleParallelDo) op);
            for (Object entry : mPDo.getDests().entrySet()) {
                Map.Entry<PCollection, DoFn> en = (Map.Entry<PCollection, DoFn>) entry;
                LazyCollection<?> lCol = (LazyCollection<?>) en.getKey();
                DeferredOp childOp = null;
                if (lCol.getDownOps() != null && lCol.getDownOps().size() > 0) {
                    childOp = lCol.getDownOps().get(0);
                }
                final Integer channel;
                if (childOp != null && childOp instanceof Flatten) {
                    channel = mscr.getNumberedChannels().get(((Flatten) childOp).getDest());
                } else if (childOp != null && childOp instanceof GroupByKey) {
                    channel = mscr.getNumberedChannels().get(((GroupByKey) childOp).getOrigin());
                } else {
                    // bypass channel?
                    channel = mscr.getNumberedChannels().get(en.getKey());
                }
                if (channel == null) {
                    // This is not for this MSCR - just skip it
                    return;
                }
                // Call parallelDo function
                en.getValue().process(toProcess, new EmitFn() {

                    @Override
                    public void emit(Object v) {
                        try {
                            if (v instanceof Pair) {
                                Pair p = (Pair) v;
                                context.write(new PlumeObject((WritableComparable) p.getKey(), channel), new PlumeObject((WritableComparable) p.getValue(), channel));
                            } else {
                                context.write(new PlumeObject((WritableComparable) v, channel), new PlumeObject((WritableComparable) v, channel));
                            }
                        } catch (Exception e) {
                            // TODO How to report this
                            e.printStackTrace();
                        }
                    }
                });
            }
        } else {
            if (op instanceof Flatten) {
                l = (LazyCollection) ((Flatten) op).getDest();
            }
            int channel = mscr.getNumberedChannels().get(l);
            if (toProcess instanceof Pair) {
                context.write(new PlumeObject(key, channel), new PlumeObject(value, channel));
            } else {
                context.write(new PlumeObject(value, channel), new PlumeObject(value, channel));
            }
        }
    }
}
Also used : MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) GroupByKey(com.tdunning.plume.local.lazy.op.GroupByKey) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) PTableType(com.tdunning.plume.types.PTableType) Flatten(com.tdunning.plume.local.lazy.op.Flatten) PCollectionType(com.tdunning.plume.types.PCollectionType) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) DeferredOp(com.tdunning.plume.local.lazy.op.DeferredOp) IOException(java.io.IOException) PCollection(com.tdunning.plume.PCollection) DoFn(com.tdunning.plume.DoFn) EmitFn(com.tdunning.plume.EmitFn) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) Map(java.util.Map) Pair(com.tdunning.plume.Pair)

Example 3 with GroupByKey

use of com.tdunning.plume.local.lazy.op.GroupByKey in project Plume by tdunning.

the class OptimizerTools method getMSCRBlocks.

/**
 * This utility returns all the different MSCR blocks that can be created from this plan
 *
 * (pere) As of Oct/2010, I think this code can be simplified to be more like addRemainingTrivialMSCRs(), so a possible TODO would be
 *  to refactor it and make it more understandable. An opened question is whether there is an easy way of coding finding all possible
 *  MSCRs (including trivial, not related to GroupByKey operations ones) in a single and elegant loop.
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
static Set<MSCR> getMSCRBlocks(List<PCollection> outputs) {
    // Get all GroupByKeys from the tree
    List<DeferredOp> groupBys = OptimizerTools.getAll(outputs, GroupByKey.class);
    int mscrId = 1;
    Set<MSCR> mscrs = new HashSet<MSCR>();
    // For all found GroupByKey blocks
    for (DeferredOp gBK : groupBys) {
        GroupByKey groupBy = (GroupByKey<?, ?>) gBK;
        // Gather all information needed for MSCR from this GBK
        Set<PCollection<?>> inputs = new HashSet<PCollection<?>>();
        Set<GroupByKey<?, ?>> outputChannels = new HashSet<GroupByKey<?, ?>>();
        Set<Flatten<?>> unGroupedOutputChannels = new HashSet<Flatten<?>>();
        Set<PCollection<?>> bypassChannels = new HashSet<PCollection<?>>();
        Stack<LazyCollection<?>> toVisit = new Stack<LazyCollection<?>>();
        Set<LazyCollection<?>> visited = new HashSet<LazyCollection<?>>();
        LazyCollection<?> origin = (LazyCollection<?>) groupBy.getOrigin();
        toVisit.push(origin);
        outputChannels.add(groupBy);
        while (!toVisit.isEmpty()) {
            LazyCollection<?> current = toVisit.pop();
            visited.add(current);
            if (current.isMaterialized()) {
                // condition for being a materialized input. This may change.
                inputs.add(current);
                continue;
            }
            DeferredOp op = current.getDeferredOp();
            if (op instanceof MultipleParallelDo) {
                // second condition for being an input
                MultipleParallelDo<?> mPDo = (MultipleParallelDo) current.getDeferredOp();
                if (((LazyCollection<?>) mPDo.getOrigin()).isMaterialized()) {
                    // will be done in Mapper
                    inputs.add(mPDo.getOrigin());
                } else if (op instanceof ParallelDo) {
                    // will be done in Reducer
                    inputs.add(current);
                } else {
                    // will be done in Mapper
                    inputs.add(mPDo.getOrigin());
                }
                // Check for bypass channels & output channels with no group-by
                for (Map.Entry entry : mPDo.getDests().entrySet()) {
                    LazyCollection coll = (LazyCollection) entry.getKey();
                    if (coll.getDownOps() == null || coll.getDownOps().size() == 0) {
                        // leaf node
                        bypassChannels.add(coll);
                    } else if (coll.getDownOps().get(0) instanceof MultipleParallelDo) {
                        bypassChannels.add(coll);
                    /*
             * Case of an output channel that Flattens with no Group By
             */
                    } else if (coll.getDownOps().get(0) instanceof Flatten) {
                        Flatten<?> thisFlatten = (Flatten<?>) coll.getDownOps().get(0);
                        LazyCollection ldest = (LazyCollection) thisFlatten.getDest();
                        if (ldest.getDownOps() == null || ldest.getDownOps().size() == 0 || ldest.getDownOps().get(0) instanceof MultipleParallelDo) {
                            unGroupedOutputChannels.add(thisFlatten);
                            // Add the rest of this flatten's origins to the stack in order to possibly discover more output channels
                            for (PCollection<?> col : thisFlatten.getOrigins()) {
                                if (!visited.contains(col)) {
                                    toVisit.push((LazyCollection<?>) col);
                                }
                            }
                        }
                    }
                }
                continue;
            }
            if (op instanceof GroupByKey) {
                // third condition for being an input - rare case when one GBK follows another
                inputs.add(current);
                continue;
            }
            if (op instanceof Flatten) {
                Flatten<?> flatten = (Flatten<?>) op;
                for (PCollection<?> input : flatten.getOrigins()) {
                    LazyCollection<?> in = (LazyCollection<?>) input;
                    if (!visited.contains(in)) {
                        toVisit.push(in);
                    }
                }
                continue;
            }
            if (op instanceof OneToOneOp) {
                LazyCollection<?> input = (LazyCollection<?>) ((OneToOneOp<?, ?>) op).getOrigin();
                if (!visited.contains(input)) {
                    toVisit.push(input);
                }
                continue;
            }
        }
        MSCR mscrToAdd = null;
        // Check if there is already one MSCR with at least one of this inputs
        for (MSCR mscr : mscrs) {
            for (PCollection<?> input : inputs) {
                if (mscr.hasInput(input)) {
                    mscrToAdd = mscr;
                    break;
                }
            }
        }
        if (mscrToAdd == null) {
            // otherwise create new MSCR
            mscrToAdd = new MSCR(mscrId);
            mscrId++;
        }
        // Add all missing input channels to current MSCR
        for (PCollection<?> input : inputs) {
            if (!mscrToAdd.hasInput(input)) {
                mscrToAdd.addInput(input);
            }
        }
        // Add all missing bypass outputs to current MSCR
        for (PCollection<?> col : bypassChannels) {
            if (!mscrToAdd.hasOutputChannel(col)) {
                // Create new by-pass channel
                MSCR.OutputChannel oC = new MSCR.OutputChannel(col);
                mscrToAdd.addOutputChannel(oC);
            }
        }
        // Add all missing flatten-with-no-groupby outputs to current MSCR
        for (Flatten flatten : unGroupedOutputChannels) {
            if (!mscrToAdd.hasOutputChannel(flatten.getDest())) {
                // Create new channel with flatten and nothing else
                MSCR.OutputChannel oC = new MSCR.OutputChannel(flatten.getDest());
                oC.output = flatten.getDest();
                oC.flatten = flatten;
                mscrToAdd.addOutputChannel(oC);
            }
        }
        // Add all missing output channels to current MSCR
        for (GroupByKey groupByKey : outputChannels) {
            if (!mscrToAdd.hasOutputChannel(groupByKey.getOrigin())) {
                // Create new channel with group by key. It might have combiner and reducer as well.
                MSCR.OutputChannel oC = new MSCR.OutputChannel(groupByKey);
                oC.output = groupByKey.getDest();
                if (groupByKey.getOrigin().getDeferredOp() instanceof Flatten) {
                    oC.flatten = (Flatten) groupByKey.getOrigin().getDeferredOp();
                }
                if (groupByKey.getDest().getDownOps() != null && groupByKey.getDest().getDownOps().size() == 1) {
                    DeferredOp op = (DeferredOp) groupByKey.getDest().getDownOps().get(0);
                    if (op instanceof CombineValues) {
                        oC.combiner = (CombineValues) op;
                        oC.output = oC.combiner.getDest();
                        LazyCollection dest = (LazyCollection) oC.combiner.getDest();
                        if (dest.getDownOps() != null && dest.getDownOps().size() == 1) {
                            op = (DeferredOp) dest.getDownOps().get(0);
                        }
                    }
                    if (op instanceof ParallelDo) {
                        oC.reducer = (ParallelDo) op;
                        oC.output = oC.reducer.getDest();
                    }
                }
                mscrToAdd.addOutputChannel(oC);
            }
        }
        // Add if needed
        mscrs.add(mscrToAdd);
    }
    return addRemainingTrivialMSCRs(outputs, mscrId, mscrs);
}
Also used : MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) ParallelDo(com.tdunning.plume.local.lazy.op.ParallelDo) MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) GroupByKey(com.tdunning.plume.local.lazy.op.GroupByKey) DeferredOp(com.tdunning.plume.local.lazy.op.DeferredOp) HashSet(java.util.HashSet) CombineValues(com.tdunning.plume.local.lazy.op.CombineValues) Flatten(com.tdunning.plume.local.lazy.op.Flatten) OneToOneOp(com.tdunning.plume.local.lazy.op.OneToOneOp) Stack(java.util.Stack) PCollection(com.tdunning.plume.PCollection) Map(java.util.Map)

Example 4 with GroupByKey

use of com.tdunning.plume.local.lazy.op.GroupByKey in project Plume by tdunning.

the class LocalExecutor method execute.

/**
 * Execute one-output flow
 *
 * @param <T>
 * @param output
 * @return
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public <T> Iterable<T> execute(LazyCollection<T> output) {
    if (output.isMaterialized()) {
        // nothing else to execute
        return output.getData();
    } else {
        DeferredOp op = output.getDeferredOp();
        final List<T> result = Lists.newArrayList();
        // Flatten op
        if (op instanceof Flatten) {
            Flatten<T> flatten = (Flatten<T>) op;
            for (PCollection<T> col : flatten.getOrigins()) {
                Iterable<T> res = execute((LazyCollection<T>) col);
                result.addAll(Lists.newArrayList(res));
            }
            // done with it
            return result;
        }
        Iterable parent;
        EmitFn<T> emitter = new EmitFn<T>() {

            @Override
            public void emit(T v) {
                result.add(v);
            }
        };
        // ParallelDo
        if (op instanceof ParallelDo) {
            ParallelDo pDo = (ParallelDo) op;
            parent = execute((LazyCollection) pDo.getOrigin());
            for (Object obj : parent) {
                pDo.getFunction().process(obj, emitter);
            }
        // MultipleParallelDo -> parallel operations that read the same collection
        // In this version of executor, we will only compute the current collection, not its neighbors
        } else if (op instanceof MultipleParallelDo) {
            MultipleParallelDo mPDo = (MultipleParallelDo) op;
            parent = execute((LazyCollection) mPDo.getOrigin());
            // get the function that corresponds to this collection
            DoFn function = (DoFn) mPDo.getDests().get(output);
            for (Object obj : parent) {
                function.process(obj, emitter);
            }
        // GroupByKey
        } else if (op instanceof GroupByKey) {
            GroupByKey gBK = (GroupByKey) op;
            parent = execute(gBK.getOrigin());
            Map<Object, List> groupMap = Maps.newHashMap();
            // Perform in-memory group by operation
            for (Object obj : parent) {
                Pair p = (Pair) obj;
                List list = groupMap.get(p.getKey());
                if (list == null) {
                    list = new ArrayList();
                }
                list.add(p.getValue());
                groupMap.put(p.getKey(), list);
            }
            for (Map.Entry<Object, List> entry : groupMap.entrySet()) {
                result.add((T) new Pair(entry.getKey(), entry.getValue()));
            }
        }
        return result;
    }
}
Also used : ParallelDo(com.tdunning.plume.local.lazy.op.ParallelDo) MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) GroupByKey(com.tdunning.plume.local.lazy.op.GroupByKey) Flatten(com.tdunning.plume.local.lazy.op.Flatten) ArrayList(java.util.ArrayList) DeferredOp(com.tdunning.plume.local.lazy.op.DeferredOp) DoFn(com.tdunning.plume.DoFn) EmitFn(com.tdunning.plume.EmitFn) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) Pair(com.tdunning.plume.Pair)

Example 5 with GroupByKey

use of com.tdunning.plume.local.lazy.op.GroupByKey in project Plume by tdunning.

the class Optimizer method fuseParallelDos.

/**
 * Fuse producer-consumer ParallelDos as in : {Orig2 => p2 => Orig1 => p1 => Output} to {Orig2 => p1(p2) => Output}
 * @param arg  The collection that may have compositions internally.
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
<T> void fuseParallelDos(PCollection<T> arg) {
    LazyCollection<T> output = (LazyCollection<T>) arg;
    if (output.isMaterialized()) {
        // stop condition for recursive algorithm
        return;
    }
    DeferredOp dOp = output.getDeferredOp();
    if (!(dOp instanceof ParallelDo)) {
        // not a ParallelDo
        if (dOp instanceof OneToOneOp) {
            // Recursively apply this function to parent
            fuseParallelDos(((OneToOneOp) dOp).getOrigin());
            return;
        }
        if (dOp instanceof Flatten) {
            Flatten<T> flatten = (Flatten) dOp;
            // Recursively apply this function to all parents
            for (PCollection<T> col : flatten.getOrigins()) {
                fuseParallelDos(col);
            }
            return;
        }
    }
    ParallelDo p1 = (ParallelDo) output.getDeferredOp();
    LazyCollection orig1 = (LazyCollection) p1.getOrigin();
    if (orig1.isMaterialized()) {
        return;
    }
    if (!(orig1.getDeferredOp() instanceof ParallelDo)) {
        // Recursively apply this function to parent node
        fuseParallelDos(orig1);
        return;
    }
    // At this point we know ParallelDo fusion can be done -> Perform it
    ParallelDo p2 = (ParallelDo) orig1.getDeferredOp();
    // Lift combine values
    if (p2 instanceof CombineValues) {
        LazyCollection lCol = (LazyCollection) p2.getOrigin();
        if (!lCol.isMaterialized() && lCol.getDeferredOp() instanceof GroupByKey) {
            // Upper parallel do is CombineValues and follows a GroupByKey -> don't join
            fuseParallelDos(orig1);
            return;
        }
    }
    final DoFn f1 = p1.getFunction();
    final DoFn f2 = p2.getFunction();
    // Define the joined function
    DoFn newFn = new DoFn() {

        @Override
        public void process(Object v, final EmitFn emitter) {
            f2.process(v, new EmitFn() {

                @Override
                public void emit(Object v) {
                    f1.process(v, emitter);
                }
            });
        }
    };
    LazyCollection orig2 = (LazyCollection) p2.getOrigin();
    ParallelDo newPDo = new ParallelDo(newFn, orig2, output);
    // Clean & change pointers
    orig2.downOps.remove(p2);
    orig1.downOps.remove(p1);
    orig2.addDownOp(newPDo);
    output.deferredOp = newPDo;
    // Recursively apply this function to the same node => TODO Beware infinite recursion, properly test
    fuseParallelDos(output);
}
Also used : CombineValues(com.tdunning.plume.local.lazy.op.CombineValues) ParallelDo(com.tdunning.plume.local.lazy.op.ParallelDo) MultipleParallelDo(com.tdunning.plume.local.lazy.op.MultipleParallelDo) GroupByKey(com.tdunning.plume.local.lazy.op.GroupByKey) Flatten(com.tdunning.plume.local.lazy.op.Flatten) OneToOneOp(com.tdunning.plume.local.lazy.op.OneToOneOp) DeferredOp(com.tdunning.plume.local.lazy.op.DeferredOp) DoFn(com.tdunning.plume.DoFn) EmitFn(com.tdunning.plume.EmitFn)

Aggregations

DeferredOp (com.tdunning.plume.local.lazy.op.DeferredOp)5 Flatten (com.tdunning.plume.local.lazy.op.Flatten)5 GroupByKey (com.tdunning.plume.local.lazy.op.GroupByKey)5 MultipleParallelDo (com.tdunning.plume.local.lazy.op.MultipleParallelDo)5 DoFn (com.tdunning.plume.DoFn)4 EmitFn (com.tdunning.plume.EmitFn)4 Map (java.util.Map)4 PCollection (com.tdunning.plume.PCollection)3 Pair (com.tdunning.plume.Pair)3 ParallelDo (com.tdunning.plume.local.lazy.op.ParallelDo)3 PlumeObject (com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject)2 CombineValues (com.tdunning.plume.local.lazy.op.CombineValues)2 OneToOneOp (com.tdunning.plume.local.lazy.op.OneToOneOp)2 PCollectionType (com.tdunning.plume.types.PCollectionType)2 PTableType (com.tdunning.plume.types.PTableType)2 IOException (java.io.IOException)2 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 List (java.util.List)1