use of org.apache.flink.optimizer.plan.WorksetIterationPlanNode in project flink by apache.
the class IterationsCompilerTest method testTwoWorksetIterationsDirectlyChained.
@Test
public void testTwoWorksetIterationsDirectlyChained() throws Exception {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
DataSet<Tuple2<Long, Long>> firstResult = doDeltaIteration(verticesWithInitialId, edges);
DataSet<Tuple2<Long, Long>> secondResult = doDeltaIteration(firstResult, edges);
secondResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
assertEquals(1, op.getDataSinks().size());
assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());
assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.WorksetIterationPlanNode in project flink by apache.
the class WorksetIterationCornerCasesTest method testWorksetIterationNotDependingOnSolutionSet.
@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long, Long>>());
iteration.closeWith(iterEnd, iterEnd).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
JobGraphGenerator jgg = new JobGraphGenerator();
jgg.compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.WorksetIterationPlanNode in project flink by apache.
the class JobGraphGenerator method compileJobGraph.
public JobGraph compileJobGraph(OptimizedPlan program, JobID jobId) {
if (program == null) {
throw new NullPointerException("Program is null, did you called " + "ExecutionEnvironment.execute()");
}
if (jobId == null) {
jobId = JobID.generate();
}
this.vertices = new HashMap<PlanNode, JobVertex>();
this.chainedTasks = new HashMap<PlanNode, TaskInChain>();
this.chainedTasksInSequence = new ArrayList<TaskInChain>();
this.auxVertices = new ArrayList<JobVertex>();
this.iterations = new HashMap<IterationPlanNode, IterationDescriptor>();
this.iterationStack = new ArrayList<IterationPlanNode>();
this.sharingGroup = new SlotSharingGroup();
// this starts the traversal that generates the job graph
program.accept(this);
// sanity check that we are not somehow in an iteration at the end
if (this.currentIteration != null) {
throw new CompilerException("The graph translation ended prematurely, leaving an unclosed iteration.");
}
// finalize the iterations
for (IterationDescriptor iteration : this.iterations.values()) {
if (iteration.getIterationNode() instanceof BulkIterationPlanNode) {
finalizeBulkIteration(iteration);
} else if (iteration.getIterationNode() instanceof WorksetIterationPlanNode) {
finalizeWorksetIteration(iteration);
} else {
throw new CompilerException();
}
}
// parents' configurations
for (TaskInChain tic : this.chainedTasksInSequence) {
TaskConfig t = new TaskConfig(tic.getContainingVertex().getConfiguration());
t.addChainedTask(tic.getChainedTask(), tic.getTaskConfig(), tic.getTaskName());
}
// ----- attach the additional info to the job vertices, for display in the runtime monitor
attachOperatorNamesAndDescriptions();
// ----------- finalize the job graph -----------
// create the job graph object
JobGraph graph = new JobGraph(jobId, program.getJobName());
try {
graph.setExecutionConfig(program.getOriginalPlan().getExecutionConfig());
} catch (IOException e) {
throw new CompilerException("Could not serialize the ExecutionConfig." + "This indicates that non-serializable types (like custom serializers) were registered");
}
graph.setAllowQueuedScheduling(false);
graph.setSessionTimeout(program.getOriginalPlan().getSessionTimeout());
// add vertices to the graph
for (JobVertex vertex : this.vertices.values()) {
graph.addVertex(vertex);
}
for (JobVertex vertex : this.auxVertices) {
graph.addVertex(vertex);
vertex.setSlotSharingGroup(sharingGroup);
}
// add registered cache file into job configuration
for (Entry<String, DistributedCacheEntry> e : program.getOriginalPlan().getCachedFiles()) {
DistributedCache.writeFileInfoToConfig(e.getKey(), e.getValue(), graph.getJobConfiguration());
}
// release all references again
this.vertices = null;
this.chainedTasks = null;
this.chainedTasksInSequence = null;
this.auxVertices = null;
this.iterations = null;
this.iterationStack = null;
// return job graph
return graph;
}
use of org.apache.flink.optimizer.plan.WorksetIterationPlanNode in project flink by apache.
the class JobGraphGenerator method finalizeWorksetIteration.
private void finalizeWorksetIteration(IterationDescriptor descr) {
final WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) descr.getIterationNode();
final JobVertex headVertex = descr.getHeadTask();
final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
// ------------ finalize the head config with the final outputs and the sync gate ------------
{
final int numStepFunctionOuts = headConfig.getNumOutputs();
final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
if (numStepFunctionOuts == 0) {
throw new CompilerException("The workset iteration has no operation on the workset inside the step function.");
}
headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
final double relativeMemory = iterNode.getRelativeMemoryPerSubTask();
if (relativeMemory <= 0) {
throw new CompilerException("Bug: No memory has been assigned to the workset iteration.");
}
headConfig.setIsWorksetIteration();
headConfig.setRelativeBackChannelMemory(relativeMemory / 2);
headConfig.setRelativeSolutionSetMemory(relativeMemory / 2);
// set the solution set serializer and comparator
headConfig.setSolutionSetSerializer(iterNode.getSolutionSetSerializer());
headConfig.setSolutionSetComparator(iterNode.getSolutionSetComparator());
}
// --------------------------- create the sync task ---------------------------
final TaskConfig syncConfig;
{
final JobVertex sync = new JobVertex("Sync (" + iterNode.getNodeName() + ")");
sync.setResources(iterNode.getMinResources(), iterNode.getPreferredResources());
sync.setInvokableClass(IterationSynchronizationSinkTask.class);
sync.setParallelism(1);
sync.setMaxParallelism(1);
this.auxVertices.add(sync);
syncConfig = new TaskConfig(sync.getConfiguration());
syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getParallelism());
// set the number of iteration / convergence criterion for the sync
final int maxNumIterations = iterNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
if (maxNumIterations < 1) {
throw new CompilerException("Cannot create workset iteration with unspecified maximum number of iterations.");
}
syncConfig.setNumberOfIterations(maxNumIterations);
// connect the sync task
sync.connectNewDataSetAsInput(headVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
}
// ----------------------------- create the iteration tails -----------------------------
// ----------------------- for next workset and solution set delta-----------------------
{
// we have three possible cases:
// 1) Two tails, one for workset update, one for solution set update
// 2) One tail for workset update, solution set update happens in an intermediate task
// 3) One tail for solution set update, workset update happens in an intermediate task
final PlanNode nextWorksetNode = iterNode.getNextWorkSetPlanNode();
final PlanNode solutionDeltaNode = iterNode.getSolutionSetDeltaPlanNode();
final boolean hasWorksetTail = nextWorksetNode.getOutgoingChannels().isEmpty();
final boolean hasSolutionSetTail = (!iterNode.isImmediateSolutionSetUpdate()) || (!hasWorksetTail);
{
// get the vertex for the workset update
final TaskConfig worksetTailConfig;
JobVertex nextWorksetVertex = this.vertices.get(nextWorksetNode);
if (nextWorksetVertex == null) {
// nextWorksetVertex is chained
TaskInChain taskInChain = this.chainedTasks.get(nextWorksetNode);
if (taskInChain == null) {
throw new CompilerException("Bug: Next workset node not found as vertex or chained task.");
}
nextWorksetVertex = taskInChain.getContainingVertex();
worksetTailConfig = taskInChain.getTaskConfig();
} else {
worksetTailConfig = new TaskConfig(nextWorksetVertex.getConfiguration());
}
// mark the node to perform workset updates
worksetTailConfig.setIsWorksetIteration();
worksetTailConfig.setIsWorksetUpdate();
if (hasWorksetTail) {
nextWorksetVertex.setInvokableClass(IterationTailTask.class);
worksetTailConfig.setOutputSerializer(iterNode.getWorksetSerializer());
}
}
{
final TaskConfig solutionDeltaConfig;
JobVertex solutionDeltaVertex = this.vertices.get(solutionDeltaNode);
if (solutionDeltaVertex == null) {
// last op is chained
TaskInChain taskInChain = this.chainedTasks.get(solutionDeltaNode);
if (taskInChain == null) {
throw new CompilerException("Bug: Solution Set Delta not found as vertex or chained task.");
}
solutionDeltaVertex = taskInChain.getContainingVertex();
solutionDeltaConfig = taskInChain.getTaskConfig();
} else {
solutionDeltaConfig = new TaskConfig(solutionDeltaVertex.getConfiguration());
}
solutionDeltaConfig.setIsWorksetIteration();
solutionDeltaConfig.setIsSolutionSetUpdate();
if (hasSolutionSetTail) {
solutionDeltaVertex.setInvokableClass(IterationTailTask.class);
solutionDeltaConfig.setOutputSerializer(iterNode.getSolutionSetSerializer());
// tell the head that it needs to wait for the solution set updates
headConfig.setWaitForSolutionSetUpdate();
} else {
// no tail, intermediate update. must be immediate update
if (!iterNode.isImmediateSolutionSetUpdate()) {
throw new CompilerException("A solution set update without dedicated tail is not set to perform immediate updates.");
}
solutionDeltaConfig.setIsSolutionSetUpdateWithoutReprobe();
}
}
}
// ------------------- register the aggregators -------------------
AggregatorRegistry aggs = iterNode.getIterationNode().getIterationContract().getAggregators();
Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
for (AggregatorWithName<?> agg : allAggregators) {
if (agg.getName().equals(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME)) {
throw new CompilerException("User defined aggregator used the same name as built-in workset " + "termination check aggregator: " + WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME);
}
}
headConfig.addIterationAggregators(allAggregators);
syncConfig.addIterationAggregators(allAggregators);
String convAggName = aggs.getConvergenceCriterionAggregatorName();
ConvergenceCriterion<?> convCriterion = aggs.getConvergenceCriterion();
if (convCriterion != null || convAggName != null) {
if (convCriterion == null) {
throw new CompilerException("Error: Convergence criterion aggregator set, but criterion is null.");
}
if (convAggName == null) {
throw new CompilerException("Error: Aggregator convergence criterion set, but aggregator is null.");
}
syncConfig.setConvergenceCriterion(convAggName, convCriterion);
}
headConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new LongSumAggregator());
syncConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new LongSumAggregator());
syncConfig.setImplicitConvergenceCriterion(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new WorksetEmptyConvergenceCriterion());
}
use of org.apache.flink.optimizer.plan.WorksetIterationPlanNode in project flink by apache.
the class JobGraphGenerator method translateChannel.
private int translateChannel(Channel input, int inputIndex, JobVertex targetVertex, TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception {
final PlanNode inputPlanNode = input.getSource();
final Iterator<Channel> allInChannels;
if (inputPlanNode instanceof NAryUnionPlanNode) {
allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
// deadlocks when closing a branching flow at runtime.
for (Channel in : inputPlanNode.getInputs()) {
if (input.getDataExchangeMode().equals(DataExchangeMode.BATCH)) {
in.setDataExchangeMode(DataExchangeMode.BATCH);
}
if (isBroadcast) {
in.setShipStrategy(ShipStrategyType.BROADCAST, in.getDataExchangeMode());
}
}
} else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof WorksetPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput2().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof SolutionSetPlanNode) {
// rather than a vertex connection
return 0;
} else {
allInChannels = Collections.singletonList(input).iterator();
}
// check that the type serializer is consistent
TypeSerializerFactory<?> typeSerFact = null;
// accounting for channels on the dynamic path
int numChannelsTotal = 0;
int numChannelsDynamicPath = 0;
int numDynamicSenderTasksTotal = 0;
// expand the channel to all the union channels, in case there is a union operator at its source
while (allInChannels.hasNext()) {
final Channel inConn = allInChannels.next();
// sanity check the common serializer
if (typeSerFact == null) {
typeSerFact = inConn.getSerializer();
} else if (!typeSerFact.equals(inConn.getSerializer())) {
throw new CompilerException("Conflicting types in union operator.");
}
final PlanNode sourceNode = inConn.getSource();
JobVertex sourceVertex = this.vertices.get(sourceNode);
TaskConfig sourceVertexConfig;
if (sourceVertex == null) {
// this predecessor is chained to another task or an iteration
final TaskInChain chainedTask;
final IterationDescriptor iteration;
if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
// push chained task
if (chainedTask.getContainingVertex() == null) {
throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
}
sourceVertex = chainedTask.getContainingVertex();
sourceVertexConfig = chainedTask.getTaskConfig();
} else if ((iteration = this.iterations.get(sourceNode)) != null) {
// predecessor is an iteration
sourceVertex = iteration.getHeadTask();
sourceVertexConfig = iteration.getHeadFinalResultConfig();
} else {
throw new CompilerException("Bug: Could not resolve source node for a channel.");
}
} else {
// predecessor is its own vertex
sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
}
DistributionPattern pattern = connectJobVertices(inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
// accounting on channels and senders
numChannelsTotal++;
if (inConn.isOnDynamicPath()) {
numChannelsDynamicPath++;
numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern, sourceVertex.getParallelism(), targetVertex.getParallelism());
}
}
// is a union between nodes on the static and nodes on the dynamic path
if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
}
if (numDynamicSenderTasksTotal > 0) {
if (isBroadcast) {
targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
} else {
targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
}
}
// the local strategy is added only once. in non-union case that is the actual edge,
// in the union case, it is the edge between union and the target node
addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
return 1;
}
Aggregations