use of org.apache.crunch.impl.mr.exec.CrunchJob in project crunch by cloudera.
the class JobPrototype method build.
private CrunchJob build(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
Job job = new Job(conf);
conf = job.getConfiguration();
conf.set(PlanningParameters.CRUNCH_WORKING_DIRECTORY, workingPath.toString());
job.setJarByClass(jarClass);
Set<DoNode> outputNodes = Sets.newHashSet();
Set<Target> targets = targetsToNodePaths.keySet();
Path outputPath = new Path(workingPath, "output");
MSCROutputHandler outputHandler = new MSCROutputHandler(job, outputPath, group == null);
for (Target target : targets) {
DoNode node = null;
for (NodePath nodePath : targetsToNodePaths.get(target)) {
if (node == null) {
PCollectionImpl<?> collect = nodePath.tail();
node = DoNode.createOutputNode(target.toString(), collect.getPType());
outputHandler.configureNode(node, target);
}
outputNodes.add(walkPath(nodePath.descendingIterator(), node));
}
}
job.setMapperClass(CrunchMapper.class);
List<DoNode> inputNodes;
DoNode reduceNode = null;
if (group != null) {
job.setReducerClass(CrunchReducer.class);
List<DoNode> reduceNodes = Lists.newArrayList(outputNodes);
serialize(reduceNodes, conf, workingPath, NodeContext.REDUCE);
reduceNode = reduceNodes.get(0);
if (combineFnTable != null) {
job.setCombinerClass(CrunchCombiner.class);
DoNode combinerInputNode = group.createDoNode();
DoNode combineNode = combineFnTable.createDoNode();
combineNode.addChild(group.getGroupingNode());
combinerInputNode.addChild(combineNode);
serialize(ImmutableList.of(combinerInputNode), conf, workingPath, NodeContext.COMBINE);
}
group.configureShuffle(job);
DoNode mapOutputNode = group.getGroupingNode();
Set<DoNode> mapNodes = Sets.newHashSet();
for (NodePath nodePath : mapNodePaths) {
// Advance these one step, since we've already configured
// the grouping node, and the PGroupedTableImpl is the tail
// of the NodePath.
Iterator<PCollectionImpl<?>> iter = nodePath.descendingIterator();
iter.next();
mapNodes.add(walkPath(iter, mapOutputNode));
}
inputNodes = Lists.newArrayList(mapNodes);
} else {
// No grouping
job.setNumReduceTasks(0);
inputNodes = Lists.newArrayList(outputNodes);
}
serialize(inputNodes, conf, workingPath, NodeContext.MAP);
if (inputNodes.size() == 1) {
DoNode inputNode = inputNodes.get(0);
inputNode.getSource().configureSource(job, -1);
} else {
for (int i = 0; i < inputNodes.size(); i++) {
DoNode inputNode = inputNodes.get(i);
inputNode.getSource().configureSource(job, i);
}
job.setInputFormatClass(CrunchInputFormat.class);
}
job.setJobName(createJobName(pipeline.getName(), inputNodes, reduceNode));
return new CrunchJob(job, outputPath, outputHandler);
}
Aggregations