use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class JobGraphGenerator method preVisit.
/**
* This methods implements the pre-visiting during a depth-first traversal. It create the job
* vertex and sets local strategy.
*
* @param node The node that is currently processed.
* @return True, if the visitor should descend to the node's children, false if not.
* @see org.apache.flink.util.Visitor#preVisit(org.apache.flink.util.Visitable)
*/
@Override
public boolean preVisit(PlanNode node) {
// check if we have visited this node before. in non-tree graphs, this happens
if (this.vertices.containsKey(node) || this.chainedTasks.containsKey(node) || this.iterations.containsKey(node)) {
// return false to prevent further descend
return false;
}
// the vertex to be created for the current node
final JobVertex vertex;
try {
if (node instanceof SinkPlanNode) {
vertex = createDataSinkVertex((SinkPlanNode) node);
} else if (node instanceof SourcePlanNode) {
vertex = createDataSourceVertex((SourcePlanNode) node);
} else if (node instanceof BulkIterationPlanNode) {
BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
// for the bulk iteration, we skip creating anything for now. we create the graph
// for the step function in the post visit.
// check that the root of the step function has the same parallelism as the
// iteration.
// because the tail must have the same parallelism as the head, we can only merge
// the last
// operator with the tail, if they have the same parallelism. not merging is
// currently not
// implemented
PlanNode root = iterationNode.getRootOfStepFunction();
if (root.getParallelism() != node.getParallelism()) {
throw new CompilerException("Error: The final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
this.iterations.put(iterationNode, descr);
vertex = null;
} else if (node instanceof WorksetIterationPlanNode) {
WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
// we have the same constraints as for the bulk iteration
PlanNode nextWorkSet = iterationNode.getNextWorkSetPlanNode();
PlanNode solutionSetDelta = iterationNode.getSolutionSetDeltaPlanNode();
if (nextWorkSet.getParallelism() != node.getParallelism()) {
throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
if (solutionSetDelta.getParallelism() != node.getParallelism()) {
throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
this.iterations.put(iterationNode, descr);
vertex = null;
} else if (node instanceof SingleInputPlanNode) {
vertex = createSingleInputVertex((SingleInputPlanNode) node);
} else if (node instanceof DualInputPlanNode) {
vertex = createDualInputVertex((DualInputPlanNode) node);
} else if (node instanceof NAryUnionPlanNode) {
// skip the union for now
vertex = null;
} else if (node instanceof BulkPartialSolutionPlanNode) {
// create a head node (or not, if it is merged into its successor)
vertex = createBulkIterationHead((BulkPartialSolutionPlanNode) node);
} else if (node instanceof SolutionSetPlanNode) {
// we adjust the joins / cogroups that go into the solution set here
for (Channel c : node.getOutgoingChannels()) {
DualInputPlanNode target = (DualInputPlanNode) c.getTarget();
JobVertex accessingVertex = this.vertices.get(target);
TaskConfig conf = new TaskConfig(accessingVertex.getConfiguration());
int inputNum = c == target.getInput1() ? 0 : c == target.getInput2() ? 1 : -1;
// sanity checks
if (inputNum == -1) {
throw new CompilerException();
}
// adjust the driver
if (conf.getDriver().equals(JoinDriver.class)) {
conf.setDriver(inputNum == 0 ? JoinWithSolutionSetFirstDriver.class : JoinWithSolutionSetSecondDriver.class);
} else if (conf.getDriver().equals(CoGroupDriver.class)) {
conf.setDriver(inputNum == 0 ? CoGroupWithSolutionSetFirstDriver.class : CoGroupWithSolutionSetSecondDriver.class);
} else {
throw new CompilerException("Found join with solution set using incompatible operator (only Join/CoGroup are valid).");
}
}
// make sure we do not visit this node again. for that, we add a 'already seen'
// entry into one of the sets
this.chainedTasks.put(node, ALREADY_VISITED_PLACEHOLDER);
vertex = null;
} else if (node instanceof WorksetPlanNode) {
// create the iteration head here
vertex = createWorksetIterationHead((WorksetPlanNode) node);
} else {
throw new CompilerException("Unrecognized node type: " + node.getClass().getName());
}
} catch (Exception e) {
throw new CompilerException("Error translating node '" + node + "': " + e.getMessage(), e);
}
// check if a vertex was created, or if it was chained or skipped
if (vertex != null) {
// set parallelism
int pd = node.getParallelism();
vertex.setParallelism(pd);
vertex.setMaxParallelism(pd);
vertex.setSlotSharingGroup(sharingGroup);
// check whether this vertex is part of an iteration step function
if (this.currentIteration != null) {
// check that the task has the same parallelism as the iteration as such
PlanNode iterationNode = (PlanNode) this.currentIteration;
if (iterationNode.getParallelism() < pd) {
throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, parallelism than the iteration operator.");
}
// store the id of the iterations the step functions participate in
IterationDescriptor descr = this.iterations.get(this.currentIteration);
new TaskConfig(vertex.getConfiguration()).setIterationId(descr.getId());
}
// store in the map
this.vertices.put(node, vertex);
}
// returning true causes deeper descend
return true;
}
use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class JavaApiPostPass method traverse.
protected void traverse(PlanNode node) {
if (!alreadyDone.add(node)) {
// already worked on that one
return;
}
// distinguish the node types
if (node instanceof SinkPlanNode) {
// descend to the input channel
SinkPlanNode sn = (SinkPlanNode) node;
Channel inchannel = sn.getInput();
traverseChannel(inchannel);
} else if (node instanceof SourcePlanNode) {
TypeInformation<?> typeInfo = getTypeInfoFromSource((SourcePlanNode) node);
((SourcePlanNode) node).setSerializer(createSerializer(typeInfo));
} else if (node instanceof BulkIterationPlanNode) {
BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
if (iterationNode.getRootOfStepFunction() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile an iteration step function where next partial solution is created by a Union node.");
}
// utilities. Needed in case of intermediate termination criterion
if (iterationNode.getRootOfTerminationCriterion() != null) {
SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
traverseChannel(addMapper.getInput());
}
BulkIterationBase<?> operator = (BulkIterationBase<?>) iterationNode.getProgramOperator();
// set the serializer
iterationNode.setSerializerForIterationChannel(createSerializer(operator.getOperatorInfo().getOutputType()));
// done, we can now propagate our info down
traverseChannel(iterationNode.getInput());
traverse(iterationNode.getRootOfStepFunction());
} else if (node instanceof WorksetIterationPlanNode) {
WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
if (iterationNode.getNextWorkSetPlanNode() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile a workset iteration step function where the next workset is produced by a Union node.");
}
if (iterationNode.getSolutionSetDeltaPlanNode() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile a workset iteration step function where the solution set delta is produced by a Union node.");
}
DeltaIterationBase<?, ?> operator = (DeltaIterationBase<?, ?>) iterationNode.getProgramOperator();
// set the serializers and comparators for the workset iteration
iterationNode.setSolutionSetSerializer(createSerializer(operator.getOperatorInfo().getFirstInputType()));
iterationNode.setWorksetSerializer(createSerializer(operator.getOperatorInfo().getSecondInputType()));
iterationNode.setSolutionSetComparator(createComparator(operator.getOperatorInfo().getFirstInputType(), iterationNode.getSolutionSetKeyFields(), getSortOrders(iterationNode.getSolutionSetKeyFields(), null)));
// traverse the inputs
traverseChannel(iterationNode.getInput1());
traverseChannel(iterationNode.getInput2());
// traverse the step function
traverse(iterationNode.getSolutionSetDeltaPlanNode());
traverse(iterationNode.getNextWorkSetPlanNode());
} else if (node instanceof SingleInputPlanNode) {
SingleInputPlanNode sn = (SingleInputPlanNode) node;
if (!(sn.getOptimizerNode().getOperator() instanceof SingleInputOperator)) {
// Special case for delta iterations
if (sn.getOptimizerNode().getOperator() instanceof NoOpUnaryUdfOp) {
traverseChannel(sn.getInput());
return;
} else {
throw new RuntimeException("Wrong operator type found in post pass.");
}
}
SingleInputOperator<?, ?, ?> singleInputOperator = (SingleInputOperator<?, ?, ?>) sn.getOptimizerNode().getOperator();
// parameterize the node's driver strategy
for (int i = 0; i < sn.getDriverStrategy().getNumRequiredComparators(); i++) {
sn.setComparator(createComparator(singleInputOperator.getOperatorInfo().getInputType(), sn.getKeys(i), getSortOrders(sn.getKeys(i), sn.getSortOrders(i))), i);
}
// done, we can now propagate our info down
traverseChannel(sn.getInput());
// don't forget the broadcast inputs
for (Channel c : sn.getBroadcastInputs()) {
traverseChannel(c);
}
} else if (node instanceof DualInputPlanNode) {
DualInputPlanNode dn = (DualInputPlanNode) node;
if (!(dn.getOptimizerNode().getOperator() instanceof DualInputOperator)) {
throw new RuntimeException("Wrong operator type found in post pass.");
}
DualInputOperator<?, ?, ?, ?> dualInputOperator = (DualInputOperator<?, ?, ?, ?>) dn.getOptimizerNode().getOperator();
// parameterize the node's driver strategy
if (dn.getDriverStrategy().getNumRequiredComparators() > 0) {
dn.setComparator1(createComparator(dualInputOperator.getOperatorInfo().getFirstInputType(), dn.getKeysForInput1(), getSortOrders(dn.getKeysForInput1(), dn.getSortOrders())));
dn.setComparator2(createComparator(dualInputOperator.getOperatorInfo().getSecondInputType(), dn.getKeysForInput2(), getSortOrders(dn.getKeysForInput2(), dn.getSortOrders())));
dn.setPairComparator(createPairComparator(dualInputOperator.getOperatorInfo().getFirstInputType(), dualInputOperator.getOperatorInfo().getSecondInputType()));
}
traverseChannel(dn.getInput1());
traverseChannel(dn.getInput2());
// don't forget the broadcast inputs
for (Channel c : dn.getBroadcastInputs()) {
traverseChannel(c);
}
} else // catch the sources of the iterative step functions
if (node instanceof BulkPartialSolutionPlanNode || node instanceof SolutionSetPlanNode || node instanceof WorksetPlanNode) {
// Do nothing :D
} else if (node instanceof NAryUnionPlanNode) {
// Traverse to all child channels
for (Channel channel : node.getInputs()) {
traverseChannel(channel);
}
} else {
throw new CompilerPostPassException("Unknown node type encountered: " + node.getClass().getName());
}
}
use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class BulkIterationNode method instantiateCandidate.
@SuppressWarnings("unchecked")
@Override
protected void instantiateCandidate(OperatorDescriptorSingle dps, Channel in, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReq, RequestedLocalProperties locPropsReq) {
// NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
// Whenever we instantiate the iteration, we enumerate new candidates for the step function.
// That way, we make sure we have an appropriate plan for each candidate for the initial
// partial solution,
// we have a fitting candidate for the step function (often, work is pushed out of the step
// function).
// Among the candidates of the step function, we keep only those that meet the requested
// properties of the
// current candidate initial partial solution. That makes sure these properties exist at the
// beginning of
// the successive iteration.
// 1) Because we enumerate multiple times, we may need to clean the cached plans
// before starting another enumeration
this.nextPartialSolution.accept(PlanCacheCleaner.INSTANCE);
if (this.terminationCriterion != null) {
this.terminationCriterion.accept(PlanCacheCleaner.INSTANCE);
}
// 2) Give the partial solution the properties of the current candidate for the initial
// partial solution
this.partialSolution.setCandidateProperties(in.getGlobalProperties(), in.getLocalProperties(), in);
final BulkPartialSolutionPlanNode pspn = this.partialSolution.getCurrentPartialSolutionPlanNode();
// 3) Get the alternative plans
List<PlanNode> candidates = this.nextPartialSolution.getAlternativePlans(estimator);
// 4) Make sure that the beginning of the step function does not assume properties that
// are not also produced by the end of the step function.
{
List<PlanNode> newCandidates = new ArrayList<PlanNode>();
for (Iterator<PlanNode> planDeleter = candidates.iterator(); planDeleter.hasNext(); ) {
PlanNode candidate = planDeleter.next();
GlobalProperties atEndGlobal = candidate.getGlobalProperties();
LocalProperties atEndLocal = candidate.getLocalProperties();
FeedbackPropertiesMeetRequirementsReport report = candidate.checkPartialSolutionPropertiesMet(pspn, atEndGlobal, atEndLocal);
if (report == FeedbackPropertiesMeetRequirementsReport.NO_PARTIAL_SOLUTION) {
// depends only through broadcast variable on the partial solution
} else if (report == FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
// attach a no-op node through which we create the properties of the original
// input
Channel toNoOp = new Channel(candidate);
globPropsReq.parameterizeChannel(toNoOp, false, rootConnection.getDataExchangeMode(), false);
locPropsReq.parameterizeChannel(toNoOp);
NoOpUnaryUdfOp noOpUnaryUdfOp = new NoOpUnaryUdfOp<>();
noOpUnaryUdfOp.setInput(candidate.getProgramOperator());
UnaryOperatorNode rebuildPropertiesNode = new UnaryOperatorNode("Rebuild Partial Solution Properties", noOpUnaryUdfOp, true);
rebuildPropertiesNode.setParallelism(candidate.getParallelism());
SingleInputPlanNode rebuildPropertiesPlanNode = new SingleInputPlanNode(rebuildPropertiesNode, "Rebuild Partial Solution Properties", toNoOp, DriverStrategy.UNARY_NO_OP);
rebuildPropertiesPlanNode.initProperties(toNoOp.getGlobalProperties(), toNoOp.getLocalProperties());
estimator.costOperator(rebuildPropertiesPlanNode);
GlobalProperties atEndGlobalModified = rebuildPropertiesPlanNode.getGlobalProperties();
LocalProperties atEndLocalModified = rebuildPropertiesPlanNode.getLocalProperties();
if (!(atEndGlobalModified.equals(atEndGlobal) && atEndLocalModified.equals(atEndLocal))) {
FeedbackPropertiesMeetRequirementsReport report2 = candidate.checkPartialSolutionPropertiesMet(pspn, atEndGlobalModified, atEndLocalModified);
if (report2 != FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
newCandidates.add(rebuildPropertiesPlanNode);
}
}
planDeleter.remove();
}
}
candidates.addAll(newCandidates);
}
if (candidates.isEmpty()) {
return;
}
// function.
if (terminationCriterion == null) {
for (PlanNode candidate : candidates) {
BulkIterationPlanNode node = new BulkIterationPlanNode(this, this.getOperator().getName(), in, pspn, candidate);
GlobalProperties gProps = candidate.getGlobalProperties().clone();
LocalProperties lProps = candidate.getLocalProperties().clone();
node.initProperties(gProps, lProps);
target.add(node);
}
} else if (candidates.size() > 0) {
List<PlanNode> terminationCriterionCandidates = this.terminationCriterion.getAlternativePlans(estimator);
SingleRootJoiner singleRoot = (SingleRootJoiner) this.singleRoot;
for (PlanNode candidate : candidates) {
for (PlanNode terminationCandidate : terminationCriterionCandidates) {
if (singleRoot.areBranchCompatible(candidate, terminationCandidate)) {
BulkIterationPlanNode node = new BulkIterationPlanNode(this, "BulkIteration (" + this.getOperator().getName() + ")", in, pspn, candidate, terminationCandidate);
GlobalProperties gProps = candidate.getGlobalProperties().clone();
LocalProperties lProps = candidate.getLocalProperties().clone();
node.initProperties(gProps, lProps);
target.add(node);
}
}
}
}
}
use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class IterationsCompilerTest method testIterationPushingWorkOut.
@Test
public void testIterationPushingWorkOut() throws Exception {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);
// we do two join operations with input1 which is the partial solution
// it is cheaper to push the partitioning out so that the feedback channel and the
// initial input do the partitioning
doBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
assertEquals(1, op.getDataSinks().size());
assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);
BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
// check that work has been pushed out
for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
assertEquals(ShipStrategyType.FORWARD, c.getShipStrategy());
}
// the end of the step function has to produce the necessary properties
for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
}
assertEquals(ShipStrategyType.PARTITION_HASH, bipn.getInput().getShipStrategy());
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class IterationsCompilerTest method testTwoIterationsDirectlyChained.
@Test
public void testTwoIterationsDirectlyChained() throws Exception {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
assertEquals(1, op.getDataSinks().size());
assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
BulkIterationPlanNode bipn = (BulkIterationPlanNode) wipn.getInput1().getSource();
// the hash partitioning has been pushed out of the delta iteration into the bulk
// iteration
assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());
// hash partitioning
for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
}
assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations