use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestMapReduce.
@Test
public void forwardFieldsTestMapReduce() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Integer, Integer>> set = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
set = set.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1").map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("*");
set.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileWithStats(plan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof ReduceOperatorBase) {
for (Channel input : visitable.getInputs()) {
GlobalProperties gprops = visitable.getGlobalProperties();
LocalProperties lprops = visitable.getLocalProperties();
Assert.assertTrue("Reduce should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.isPartitionedOnFields(new FieldSet(1)));
Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
Assert.assertTrue("Wrong LocalProperties on Reducer", lprops.getGroupedFields().contains(1));
}
}
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof MapOperatorBase) {
for (Channel input : visitable.getInputs()) {
GlobalProperties gprops = visitable.getGlobalProperties();
LocalProperties lprops = visitable.getLocalProperties();
Assert.assertTrue("Map should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.isPartitionedOnFields(new FieldSet(1)));
Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
Assert.assertTrue("Wrong LocalProperties on Mapper", lprops.getGroupedFields().contains(1));
}
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
}
});
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class JobGraphGenerator method compileJobGraph.
public JobGraph compileJobGraph(OptimizedPlan program, JobID jobId) {
if (program == null) {
throw new NullPointerException("Program is null, did you called " + "ExecutionEnvironment.execute()");
}
if (jobId == null) {
jobId = JobID.generate();
}
this.vertices = new HashMap<PlanNode, JobVertex>();
this.chainedTasks = new HashMap<PlanNode, TaskInChain>();
this.chainedTasksInSequence = new ArrayList<TaskInChain>();
this.auxVertices = new ArrayList<JobVertex>();
this.iterations = new HashMap<IterationPlanNode, IterationDescriptor>();
this.iterationStack = new ArrayList<IterationPlanNode>();
this.sharingGroup = new SlotSharingGroup();
// this starts the traversal that generates the job graph
program.accept(this);
// sanity check that we are not somehow in an iteration at the end
if (this.currentIteration != null) {
throw new CompilerException("The graph translation ended prematurely, leaving an unclosed iteration.");
}
// finalize the iterations
for (IterationDescriptor iteration : this.iterations.values()) {
if (iteration.getIterationNode() instanceof BulkIterationPlanNode) {
finalizeBulkIteration(iteration);
} else if (iteration.getIterationNode() instanceof WorksetIterationPlanNode) {
finalizeWorksetIteration(iteration);
} else {
throw new CompilerException();
}
}
// parents' configurations
for (TaskInChain tic : this.chainedTasksInSequence) {
TaskConfig t = new TaskConfig(tic.getContainingVertex().getConfiguration());
t.addChainedTask(tic.getChainedTask(), tic.getTaskConfig(), tic.getTaskName());
}
// ----- attach the additional info to the job vertices, for display in the runtime monitor
attachOperatorNamesAndDescriptions();
// ----------- finalize the job graph -----------
// create the job graph object
JobGraph graph = new JobGraph(jobId, program.getJobName());
try {
graph.setExecutionConfig(program.getOriginalPlan().getExecutionConfig());
} catch (IOException e) {
throw new CompilerException("Could not serialize the ExecutionConfig." + "This indicates that non-serializable types (like custom serializers) were registered");
}
graph.setAllowQueuedScheduling(false);
graph.setSessionTimeout(program.getOriginalPlan().getSessionTimeout());
// add vertices to the graph
for (JobVertex vertex : this.vertices.values()) {
graph.addVertex(vertex);
}
for (JobVertex vertex : this.auxVertices) {
graph.addVertex(vertex);
vertex.setSlotSharingGroup(sharingGroup);
}
// add registered cache file into job configuration
for (Entry<String, DistributedCacheEntry> e : program.getOriginalPlan().getCachedFiles()) {
DistributedCache.writeFileInfoToConfig(e.getKey(), e.getValue(), graph.getJobConfiguration());
}
// release all references again
this.vertices = null;
this.chainedTasks = null;
this.chainedTasksInSequence = null;
this.auxVertices = null;
this.iterations = null;
this.iterationStack = null;
// return job graph
return graph;
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class JobGraphGenerator method finalizeWorksetIteration.
private void finalizeWorksetIteration(IterationDescriptor descr) {
final WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) descr.getIterationNode();
final JobVertex headVertex = descr.getHeadTask();
final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
// ------------ finalize the head config with the final outputs and the sync gate ------------
{
final int numStepFunctionOuts = headConfig.getNumOutputs();
final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
if (numStepFunctionOuts == 0) {
throw new CompilerException("The workset iteration has no operation on the workset inside the step function.");
}
headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
final double relativeMemory = iterNode.getRelativeMemoryPerSubTask();
if (relativeMemory <= 0) {
throw new CompilerException("Bug: No memory has been assigned to the workset iteration.");
}
headConfig.setIsWorksetIteration();
headConfig.setRelativeBackChannelMemory(relativeMemory / 2);
headConfig.setRelativeSolutionSetMemory(relativeMemory / 2);
// set the solution set serializer and comparator
headConfig.setSolutionSetSerializer(iterNode.getSolutionSetSerializer());
headConfig.setSolutionSetComparator(iterNode.getSolutionSetComparator());
}
// --------------------------- create the sync task ---------------------------
final TaskConfig syncConfig;
{
final JobVertex sync = new JobVertex("Sync (" + iterNode.getNodeName() + ")");
sync.setResources(iterNode.getMinResources(), iterNode.getPreferredResources());
sync.setInvokableClass(IterationSynchronizationSinkTask.class);
sync.setParallelism(1);
sync.setMaxParallelism(1);
this.auxVertices.add(sync);
syncConfig = new TaskConfig(sync.getConfiguration());
syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getParallelism());
// set the number of iteration / convergence criterion for the sync
final int maxNumIterations = iterNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
if (maxNumIterations < 1) {
throw new CompilerException("Cannot create workset iteration with unspecified maximum number of iterations.");
}
syncConfig.setNumberOfIterations(maxNumIterations);
// connect the sync task
sync.connectNewDataSetAsInput(headVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
}
// ----------------------------- create the iteration tails -----------------------------
// ----------------------- for next workset and solution set delta-----------------------
{
// we have three possible cases:
// 1) Two tails, one for workset update, one for solution set update
// 2) One tail for workset update, solution set update happens in an intermediate task
// 3) One tail for solution set update, workset update happens in an intermediate task
final PlanNode nextWorksetNode = iterNode.getNextWorkSetPlanNode();
final PlanNode solutionDeltaNode = iterNode.getSolutionSetDeltaPlanNode();
final boolean hasWorksetTail = nextWorksetNode.getOutgoingChannels().isEmpty();
final boolean hasSolutionSetTail = (!iterNode.isImmediateSolutionSetUpdate()) || (!hasWorksetTail);
{
// get the vertex for the workset update
final TaskConfig worksetTailConfig;
JobVertex nextWorksetVertex = this.vertices.get(nextWorksetNode);
if (nextWorksetVertex == null) {
// nextWorksetVertex is chained
TaskInChain taskInChain = this.chainedTasks.get(nextWorksetNode);
if (taskInChain == null) {
throw new CompilerException("Bug: Next workset node not found as vertex or chained task.");
}
nextWorksetVertex = taskInChain.getContainingVertex();
worksetTailConfig = taskInChain.getTaskConfig();
} else {
worksetTailConfig = new TaskConfig(nextWorksetVertex.getConfiguration());
}
// mark the node to perform workset updates
worksetTailConfig.setIsWorksetIteration();
worksetTailConfig.setIsWorksetUpdate();
if (hasWorksetTail) {
nextWorksetVertex.setInvokableClass(IterationTailTask.class);
worksetTailConfig.setOutputSerializer(iterNode.getWorksetSerializer());
}
}
{
final TaskConfig solutionDeltaConfig;
JobVertex solutionDeltaVertex = this.vertices.get(solutionDeltaNode);
if (solutionDeltaVertex == null) {
// last op is chained
TaskInChain taskInChain = this.chainedTasks.get(solutionDeltaNode);
if (taskInChain == null) {
throw new CompilerException("Bug: Solution Set Delta not found as vertex or chained task.");
}
solutionDeltaVertex = taskInChain.getContainingVertex();
solutionDeltaConfig = taskInChain.getTaskConfig();
} else {
solutionDeltaConfig = new TaskConfig(solutionDeltaVertex.getConfiguration());
}
solutionDeltaConfig.setIsWorksetIteration();
solutionDeltaConfig.setIsSolutionSetUpdate();
if (hasSolutionSetTail) {
solutionDeltaVertex.setInvokableClass(IterationTailTask.class);
solutionDeltaConfig.setOutputSerializer(iterNode.getSolutionSetSerializer());
// tell the head that it needs to wait for the solution set updates
headConfig.setWaitForSolutionSetUpdate();
} else {
// no tail, intermediate update. must be immediate update
if (!iterNode.isImmediateSolutionSetUpdate()) {
throw new CompilerException("A solution set update without dedicated tail is not set to perform immediate updates.");
}
solutionDeltaConfig.setIsSolutionSetUpdateWithoutReprobe();
}
}
}
// ------------------- register the aggregators -------------------
AggregatorRegistry aggs = iterNode.getIterationNode().getIterationContract().getAggregators();
Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
for (AggregatorWithName<?> agg : allAggregators) {
if (agg.getName().equals(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME)) {
throw new CompilerException("User defined aggregator used the same name as built-in workset " + "termination check aggregator: " + WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME);
}
}
headConfig.addIterationAggregators(allAggregators);
syncConfig.addIterationAggregators(allAggregators);
String convAggName = aggs.getConvergenceCriterionAggregatorName();
ConvergenceCriterion<?> convCriterion = aggs.getConvergenceCriterion();
if (convCriterion != null || convAggName != null) {
if (convCriterion == null) {
throw new CompilerException("Error: Convergence criterion aggregator set, but criterion is null.");
}
if (convAggName == null) {
throw new CompilerException("Error: Aggregator convergence criterion set, but aggregator is null.");
}
syncConfig.setConvergenceCriterion(convAggName, convCriterion);
}
headConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new LongSumAggregator());
syncConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new LongSumAggregator());
syncConfig.setImplicitConvergenceCriterion(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new WorksetEmptyConvergenceCriterion());
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class JobGraphGenerator method translateChannel.
private int translateChannel(Channel input, int inputIndex, JobVertex targetVertex, TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception {
final PlanNode inputPlanNode = input.getSource();
final Iterator<Channel> allInChannels;
if (inputPlanNode instanceof NAryUnionPlanNode) {
allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
// deadlocks when closing a branching flow at runtime.
for (Channel in : inputPlanNode.getInputs()) {
if (input.getDataExchangeMode().equals(DataExchangeMode.BATCH)) {
in.setDataExchangeMode(DataExchangeMode.BATCH);
}
if (isBroadcast) {
in.setShipStrategy(ShipStrategyType.BROADCAST, in.getDataExchangeMode());
}
}
} else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof WorksetPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput2().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof SolutionSetPlanNode) {
// rather than a vertex connection
return 0;
} else {
allInChannels = Collections.singletonList(input).iterator();
}
// check that the type serializer is consistent
TypeSerializerFactory<?> typeSerFact = null;
// accounting for channels on the dynamic path
int numChannelsTotal = 0;
int numChannelsDynamicPath = 0;
int numDynamicSenderTasksTotal = 0;
// expand the channel to all the union channels, in case there is a union operator at its source
while (allInChannels.hasNext()) {
final Channel inConn = allInChannels.next();
// sanity check the common serializer
if (typeSerFact == null) {
typeSerFact = inConn.getSerializer();
} else if (!typeSerFact.equals(inConn.getSerializer())) {
throw new CompilerException("Conflicting types in union operator.");
}
final PlanNode sourceNode = inConn.getSource();
JobVertex sourceVertex = this.vertices.get(sourceNode);
TaskConfig sourceVertexConfig;
if (sourceVertex == null) {
// this predecessor is chained to another task or an iteration
final TaskInChain chainedTask;
final IterationDescriptor iteration;
if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
// push chained task
if (chainedTask.getContainingVertex() == null) {
throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
}
sourceVertex = chainedTask.getContainingVertex();
sourceVertexConfig = chainedTask.getTaskConfig();
} else if ((iteration = this.iterations.get(sourceNode)) != null) {
// predecessor is an iteration
sourceVertex = iteration.getHeadTask();
sourceVertexConfig = iteration.getHeadFinalResultConfig();
} else {
throw new CompilerException("Bug: Could not resolve source node for a channel.");
}
} else {
// predecessor is its own vertex
sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
}
DistributionPattern pattern = connectJobVertices(inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
// accounting on channels and senders
numChannelsTotal++;
if (inConn.isOnDynamicPath()) {
numChannelsDynamicPath++;
numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern, sourceVertex.getParallelism(), targetVertex.getParallelism());
}
}
// is a union between nodes on the static and nodes on the dynamic path
if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
}
if (numDynamicSenderTasksTotal > 0) {
if (isBroadcast) {
targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
} else {
targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
}
}
// the local strategy is added only once. in non-union case that is the actual edge,
// in the union case, it is the edge between union and the target node
addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
return 1;
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class JobGraphGenerator method createBulkIterationHead.
private JobVertex createBulkIterationHead(BulkPartialSolutionPlanNode pspn) {
// get the bulk iteration that corresponds to this partial solution node
final BulkIterationPlanNode iteration = pspn.getContainingIterationNode();
// check whether we need an individual vertex for the partial solution, or whether we
// attach ourselves to the vertex of the parent node. We can combine the head with a node of
// the step function, if
// 1) There is one parent that the partial solution connects to via a forward pattern and no
// local strategy
// 2) parallelism and the number of subtasks per instance does not change
// 3) That successor is not a union
// 4) That successor is not itself the last node of the step function
// 5) There is no local strategy on the edge for the initial partial solution, as
// this translates to a local strategy that would only be executed in the first iteration
final boolean merge;
if (mergeIterationAuxTasks && pspn.getOutgoingChannels().size() == 1) {
final Channel c = pspn.getOutgoingChannels().get(0);
final PlanNode successor = c.getTarget();
merge = c.getShipStrategy() == ShipStrategyType.FORWARD && c.getLocalStrategy() == LocalStrategy.NONE && c.getTempMode() == TempMode.NONE && successor.getParallelism() == pspn.getParallelism() && !(successor instanceof NAryUnionPlanNode) && successor != iteration.getRootOfStepFunction() && iteration.getInput().getLocalStrategy() == LocalStrategy.NONE;
} else {
merge = false;
}
// create or adopt the head vertex
final JobVertex toReturn;
final JobVertex headVertex;
final TaskConfig headConfig;
if (merge) {
final PlanNode successor = pspn.getOutgoingChannels().get(0).getTarget();
headVertex = this.vertices.get(successor);
if (headVertex == null) {
throw new CompilerException("Bug: Trying to merge solution set with its successor, but successor has not been created.");
}
// reset the vertex type to iteration head
headVertex.setInvokableClass(IterationHeadTask.class);
headConfig = new TaskConfig(headVertex.getConfiguration());
toReturn = null;
} else {
// instantiate the head vertex and give it a no-op driver as the driver strategy.
// everything else happens in the post visit, after the input (the initial partial solution)
// is connected.
headVertex = new JobVertex("PartialSolution (" + iteration.getNodeName() + ")");
headVertex.setResources(iteration.getMinResources(), iteration.getPreferredResources());
headVertex.setInvokableClass(IterationHeadTask.class);
headConfig = new TaskConfig(headVertex.getConfiguration());
headConfig.setDriver(NoOpDriver.class);
toReturn = headVertex;
}
// create the iteration descriptor and the iteration to it
IterationDescriptor descr = this.iterations.get(iteration);
if (descr == null) {
throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
}
descr.setHeadTask(headVertex, headConfig);
return toReturn;
}
Aggregations