use of org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode in project flink by apache.
the class PageRankCompilerTest method testPageRank.
@Test
public void testPageRank() {
try {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<Long> pagesInput = env.fromElements(1l);
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> linksInput = env.fromElements(new Tuple2<Long, Long>(1l, 2l));
// assign initial rank to pages
DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.map(new RankAssigner((1.0d / 10)));
// build adjacency list from link input
DataSet<Tuple2<Long, Long[]>> adjacencyListInput = linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());
// set iterative data set
IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(10);
Configuration cfg = new Configuration();
cfg.setString(Optimizer.HINT_LOCAL_STRATEGY, Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
DataSet<Tuple2<Long, Double>> newRanks = iteration.join(adjacencyListInput).where(0).equalTo(0).withParameters(cfg).flatMap(new JoinVertexWithEdgesMatch()).groupBy(0).aggregate(SUM, 1).map(new Dampener(0.85, 10));
DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(newRanks, newRanks.join(iteration).where(0).equalTo(0).filter(new EpsilonFilter()));
finalPageRanks.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
// get the plan and compile it
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sinkPlanNode = (SinkPlanNode) op.getDataSinks().iterator().next();
BulkIterationPlanNode iterPlanNode = (BulkIterationPlanNode) sinkPlanNode.getInput().getSource();
// check that the partitioning is pushed out of the first loop
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iterPlanNode.getInput().getShipStrategy());
Assert.assertEquals(LocalStrategy.NONE, iterPlanNode.getInput().getLocalStrategy());
BulkPartialSolutionPlanNode partSolPlanNode = iterPlanNode.getPartialSolutionPlanNode();
Assert.assertEquals(ShipStrategyType.FORWARD, partSolPlanNode.getOutgoingChannels().get(0).getShipStrategy());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode in project flink by apache.
the class BulkIterationNode method instantiateCandidate.
@SuppressWarnings("unchecked")
@Override
protected void instantiateCandidate(OperatorDescriptorSingle dps, Channel in, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReq, RequestedLocalProperties locPropsReq) {
// NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
// Whenever we instantiate the iteration, we enumerate new candidates for the step function.
// That way, we make sure we have an appropriate plan for each candidate for the initial partial solution,
// we have a fitting candidate for the step function (often, work is pushed out of the step function).
// Among the candidates of the step function, we keep only those that meet the requested properties of the
// current candidate initial partial solution. That makes sure these properties exist at the beginning of
// the successive iteration.
// 1) Because we enumerate multiple times, we may need to clean the cached plans
// before starting another enumeration
this.nextPartialSolution.accept(PlanCacheCleaner.INSTANCE);
if (this.terminationCriterion != null) {
this.terminationCriterion.accept(PlanCacheCleaner.INSTANCE);
}
// 2) Give the partial solution the properties of the current candidate for the initial partial solution
this.partialSolution.setCandidateProperties(in.getGlobalProperties(), in.getLocalProperties(), in);
final BulkPartialSolutionPlanNode pspn = this.partialSolution.getCurrentPartialSolutionPlanNode();
// 3) Get the alternative plans
List<PlanNode> candidates = this.nextPartialSolution.getAlternativePlans(estimator);
// 4) Make sure that the beginning of the step function does not assume properties that
// are not also produced by the end of the step function.
{
List<PlanNode> newCandidates = new ArrayList<PlanNode>();
for (Iterator<PlanNode> planDeleter = candidates.iterator(); planDeleter.hasNext(); ) {
PlanNode candidate = planDeleter.next();
GlobalProperties atEndGlobal = candidate.getGlobalProperties();
LocalProperties atEndLocal = candidate.getLocalProperties();
FeedbackPropertiesMeetRequirementsReport report = candidate.checkPartialSolutionPropertiesMet(pspn, atEndGlobal, atEndLocal);
if (report == FeedbackPropertiesMeetRequirementsReport.NO_PARTIAL_SOLUTION) {
// depends only through broadcast variable on the partial solution
} else if (report == FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
// attach a no-op node through which we create the properties of the original input
Channel toNoOp = new Channel(candidate);
globPropsReq.parameterizeChannel(toNoOp, false, rootConnection.getDataExchangeMode(), false);
locPropsReq.parameterizeChannel(toNoOp);
NoOpUnaryUdfOp noOpUnaryUdfOp = new NoOpUnaryUdfOp<>();
noOpUnaryUdfOp.setInput(candidate.getProgramOperator());
UnaryOperatorNode rebuildPropertiesNode = new UnaryOperatorNode("Rebuild Partial Solution Properties", noOpUnaryUdfOp, true);
rebuildPropertiesNode.setParallelism(candidate.getParallelism());
SingleInputPlanNode rebuildPropertiesPlanNode = new SingleInputPlanNode(rebuildPropertiesNode, "Rebuild Partial Solution Properties", toNoOp, DriverStrategy.UNARY_NO_OP);
rebuildPropertiesPlanNode.initProperties(toNoOp.getGlobalProperties(), toNoOp.getLocalProperties());
estimator.costOperator(rebuildPropertiesPlanNode);
GlobalProperties atEndGlobalModified = rebuildPropertiesPlanNode.getGlobalProperties();
LocalProperties atEndLocalModified = rebuildPropertiesPlanNode.getLocalProperties();
if (!(atEndGlobalModified.equals(atEndGlobal) && atEndLocalModified.equals(atEndLocal))) {
FeedbackPropertiesMeetRequirementsReport report2 = candidate.checkPartialSolutionPropertiesMet(pspn, atEndGlobalModified, atEndLocalModified);
if (report2 != FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
newCandidates.add(rebuildPropertiesPlanNode);
}
}
planDeleter.remove();
}
}
candidates.addAll(newCandidates);
}
if (candidates.isEmpty()) {
return;
}
// 5) Create a candidate for the Iteration Node for every remaining plan of the step function.
if (terminationCriterion == null) {
for (PlanNode candidate : candidates) {
BulkIterationPlanNode node = new BulkIterationPlanNode(this, this.getOperator().getName(), in, pspn, candidate);
GlobalProperties gProps = candidate.getGlobalProperties().clone();
LocalProperties lProps = candidate.getLocalProperties().clone();
node.initProperties(gProps, lProps);
target.add(node);
}
} else if (candidates.size() > 0) {
List<PlanNode> terminationCriterionCandidates = this.terminationCriterion.getAlternativePlans(estimator);
SingleRootJoiner singleRoot = (SingleRootJoiner) this.singleRoot;
for (PlanNode candidate : candidates) {
for (PlanNode terminationCandidate : terminationCriterionCandidates) {
if (singleRoot.areBranchCompatible(candidate, terminationCandidate)) {
BulkIterationPlanNode node = new BulkIterationPlanNode(this, "BulkIteration (" + this.getOperator().getName() + ")", in, pspn, candidate, terminationCandidate);
GlobalProperties gProps = candidate.getGlobalProperties().clone();
LocalProperties lProps = candidate.getLocalProperties().clone();
node.initProperties(gProps, lProps);
target.add(node);
}
}
}
}
}
use of org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode in project flink by apache.
the class JobGraphGenerator method translateChannel.
private int translateChannel(Channel input, int inputIndex, JobVertex targetVertex, TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception {
final PlanNode inputPlanNode = input.getSource();
final Iterator<Channel> allInChannels;
if (inputPlanNode instanceof NAryUnionPlanNode) {
allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
// deadlocks when closing a branching flow at runtime.
for (Channel in : inputPlanNode.getInputs()) {
if (input.getDataExchangeMode().equals(DataExchangeMode.BATCH)) {
in.setDataExchangeMode(DataExchangeMode.BATCH);
}
if (isBroadcast) {
in.setShipStrategy(ShipStrategyType.BROADCAST, in.getDataExchangeMode());
}
}
} else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof WorksetPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput2().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof SolutionSetPlanNode) {
// rather than a vertex connection
return 0;
} else {
allInChannels = Collections.singletonList(input).iterator();
}
// check that the type serializer is consistent
TypeSerializerFactory<?> typeSerFact = null;
// accounting for channels on the dynamic path
int numChannelsTotal = 0;
int numChannelsDynamicPath = 0;
int numDynamicSenderTasksTotal = 0;
// expand the channel to all the union channels, in case there is a union operator at its source
while (allInChannels.hasNext()) {
final Channel inConn = allInChannels.next();
// sanity check the common serializer
if (typeSerFact == null) {
typeSerFact = inConn.getSerializer();
} else if (!typeSerFact.equals(inConn.getSerializer())) {
throw new CompilerException("Conflicting types in union operator.");
}
final PlanNode sourceNode = inConn.getSource();
JobVertex sourceVertex = this.vertices.get(sourceNode);
TaskConfig sourceVertexConfig;
if (sourceVertex == null) {
// this predecessor is chained to another task or an iteration
final TaskInChain chainedTask;
final IterationDescriptor iteration;
if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
// push chained task
if (chainedTask.getContainingVertex() == null) {
throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
}
sourceVertex = chainedTask.getContainingVertex();
sourceVertexConfig = chainedTask.getTaskConfig();
} else if ((iteration = this.iterations.get(sourceNode)) != null) {
// predecessor is an iteration
sourceVertex = iteration.getHeadTask();
sourceVertexConfig = iteration.getHeadFinalResultConfig();
} else {
throw new CompilerException("Bug: Could not resolve source node for a channel.");
}
} else {
// predecessor is its own vertex
sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
}
DistributionPattern pattern = connectJobVertices(inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
// accounting on channels and senders
numChannelsTotal++;
if (inConn.isOnDynamicPath()) {
numChannelsDynamicPath++;
numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern, sourceVertex.getParallelism(), targetVertex.getParallelism());
}
}
// is a union between nodes on the static and nodes on the dynamic path
if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
}
if (numDynamicSenderTasksTotal > 0) {
if (isBroadcast) {
targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
} else {
targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
}
}
// the local strategy is added only once. in non-union case that is the actual edge,
// in the union case, it is the edge between union and the target node
addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
return 1;
}
use of org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode in project flink by apache.
the class JobGraphGenerator method createBulkIterationHead.
private JobVertex createBulkIterationHead(BulkPartialSolutionPlanNode pspn) {
// get the bulk iteration that corresponds to this partial solution node
final BulkIterationPlanNode iteration = pspn.getContainingIterationNode();
// check whether we need an individual vertex for the partial solution, or whether we
// attach ourselves to the vertex of the parent node. We can combine the head with a node of
// the step function, if
// 1) There is one parent that the partial solution connects to via a forward pattern and no
// local strategy
// 2) parallelism and the number of subtasks per instance does not change
// 3) That successor is not a union
// 4) That successor is not itself the last node of the step function
// 5) There is no local strategy on the edge for the initial partial solution, as
// this translates to a local strategy that would only be executed in the first iteration
final boolean merge;
if (mergeIterationAuxTasks && pspn.getOutgoingChannels().size() == 1) {
final Channel c = pspn.getOutgoingChannels().get(0);
final PlanNode successor = c.getTarget();
merge = c.getShipStrategy() == ShipStrategyType.FORWARD && c.getLocalStrategy() == LocalStrategy.NONE && c.getTempMode() == TempMode.NONE && successor.getParallelism() == pspn.getParallelism() && !(successor instanceof NAryUnionPlanNode) && successor != iteration.getRootOfStepFunction() && iteration.getInput().getLocalStrategy() == LocalStrategy.NONE;
} else {
merge = false;
}
// create or adopt the head vertex
final JobVertex toReturn;
final JobVertex headVertex;
final TaskConfig headConfig;
if (merge) {
final PlanNode successor = pspn.getOutgoingChannels().get(0).getTarget();
headVertex = this.vertices.get(successor);
if (headVertex == null) {
throw new CompilerException("Bug: Trying to merge solution set with its successor, but successor has not been created.");
}
// reset the vertex type to iteration head
headVertex.setInvokableClass(IterationHeadTask.class);
headConfig = new TaskConfig(headVertex.getConfiguration());
toReturn = null;
} else {
// instantiate the head vertex and give it a no-op driver as the driver strategy.
// everything else happens in the post visit, after the input (the initial partial solution)
// is connected.
headVertex = new JobVertex("PartialSolution (" + iteration.getNodeName() + ")");
headVertex.setResources(iteration.getMinResources(), iteration.getPreferredResources());
headVertex.setInvokableClass(IterationHeadTask.class);
headConfig = new TaskConfig(headVertex.getConfiguration());
headConfig.setDriver(NoOpDriver.class);
toReturn = headVertex;
}
// create the iteration descriptor and the iteration to it
IterationDescriptor descr = this.iterations.get(iteration);
if (descr == null) {
throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
}
descr.setHeadTask(headVertex, headConfig);
return toReturn;
}
use of org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode in project flink by apache.
the class JobGraphGenerator method preVisit.
/**
* This methods implements the pre-visiting during a depth-first traversal. It create the job vertex and
* sets local strategy.
*
* @param node
* The node that is currently processed.
* @return True, if the visitor should descend to the node's children, false if not.
* @see org.apache.flink.util.Visitor#preVisit(org.apache.flink.util.Visitable)
*/
@Override
public boolean preVisit(PlanNode node) {
// check if we have visited this node before. in non-tree graphs, this happens
if (this.vertices.containsKey(node) || this.chainedTasks.containsKey(node) || this.iterations.containsKey(node)) {
// return false to prevent further descend
return false;
}
// the vertex to be created for the current node
final JobVertex vertex;
try {
if (node instanceof SinkPlanNode) {
vertex = createDataSinkVertex((SinkPlanNode) node);
} else if (node instanceof SourcePlanNode) {
vertex = createDataSourceVertex((SourcePlanNode) node);
} else if (node instanceof BulkIterationPlanNode) {
BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
// for the bulk iteration, we skip creating anything for now. we create the graph
// for the step function in the post visit.
// check that the root of the step function has the same parallelism as the iteration.
// because the tail must have the same parallelism as the head, we can only merge the last
// operator with the tail, if they have the same parallelism. not merging is currently not
// implemented
PlanNode root = iterationNode.getRootOfStepFunction();
if (root.getParallelism() != node.getParallelism()) {
throw new CompilerException("Error: The final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
this.iterations.put(iterationNode, descr);
vertex = null;
} else if (node instanceof WorksetIterationPlanNode) {
WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
// we have the same constraints as for the bulk iteration
PlanNode nextWorkSet = iterationNode.getNextWorkSetPlanNode();
PlanNode solutionSetDelta = iterationNode.getSolutionSetDeltaPlanNode();
if (nextWorkSet.getParallelism() != node.getParallelism()) {
throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
if (solutionSetDelta.getParallelism() != node.getParallelism()) {
throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
this.iterations.put(iterationNode, descr);
vertex = null;
} else if (node instanceof SingleInputPlanNode) {
vertex = createSingleInputVertex((SingleInputPlanNode) node);
} else if (node instanceof DualInputPlanNode) {
vertex = createDualInputVertex((DualInputPlanNode) node);
} else if (node instanceof NAryUnionPlanNode) {
// skip the union for now
vertex = null;
} else if (node instanceof BulkPartialSolutionPlanNode) {
// create a head node (or not, if it is merged into its successor)
vertex = createBulkIterationHead((BulkPartialSolutionPlanNode) node);
} else if (node instanceof SolutionSetPlanNode) {
// we adjust the joins / cogroups that go into the solution set here
for (Channel c : node.getOutgoingChannels()) {
DualInputPlanNode target = (DualInputPlanNode) c.getTarget();
JobVertex accessingVertex = this.vertices.get(target);
TaskConfig conf = new TaskConfig(accessingVertex.getConfiguration());
int inputNum = c == target.getInput1() ? 0 : c == target.getInput2() ? 1 : -1;
// sanity checks
if (inputNum == -1) {
throw new CompilerException();
}
// adjust the driver
if (conf.getDriver().equals(JoinDriver.class)) {
conf.setDriver(inputNum == 0 ? JoinWithSolutionSetFirstDriver.class : JoinWithSolutionSetSecondDriver.class);
} else if (conf.getDriver().equals(CoGroupDriver.class)) {
conf.setDriver(inputNum == 0 ? CoGroupWithSolutionSetFirstDriver.class : CoGroupWithSolutionSetSecondDriver.class);
} else {
throw new CompilerException("Found join with solution set using incompatible operator (only Join/CoGroup are valid).");
}
}
// make sure we do not visit this node again. for that, we add a 'already seen' entry into one of the sets
this.chainedTasks.put(node, ALREADY_VISITED_PLACEHOLDER);
vertex = null;
} else if (node instanceof WorksetPlanNode) {
// create the iteration head here
vertex = createWorksetIterationHead((WorksetPlanNode) node);
} else {
throw new CompilerException("Unrecognized node type: " + node.getClass().getName());
}
} catch (Exception e) {
throw new CompilerException("Error translating node '" + node + "': " + e.getMessage(), e);
}
// check if a vertex was created, or if it was chained or skipped
if (vertex != null) {
// set parallelism
int pd = node.getParallelism();
vertex.setParallelism(pd);
vertex.setMaxParallelism(pd);
vertex.setSlotSharingGroup(sharingGroup);
// check whether this vertex is part of an iteration step function
if (this.currentIteration != null) {
// check that the task has the same parallelism as the iteration as such
PlanNode iterationNode = (PlanNode) this.currentIteration;
if (iterationNode.getParallelism() < pd) {
throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, parallelism than the iteration operator.");
}
// store the id of the iterations the step functions participate in
IterationDescriptor descr = this.iterations.get(this.currentIteration);
new TaskConfig(vertex.getConfiguration()).setIterationId(descr.getId());
}
// store in the map
this.vertices.put(node, vertex);
}
// returning true causes deeper descend
return true;
}
Aggregations