use of org.apache.flink.optimizer.dag.TwoInputNode in project flink by apache.
the class PipelineBreakingTest method testBranchingPlanNotReJoined.
/**
* Tests that branching plans, where the branches are not re-joined,
* do not place pipeline breakers.
*
* <pre>
* /---> (filter) -> (sink)
* /
* /
* (source) -> (map) -----------------\
* \ (join) -> (sink)
* \ (source) --/
* \
* \
* \-> (sink)
* </pre>
*/
@Test
public void testBranchingPlanNotReJoined() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Integer> data = env.readTextFile("/never/accessed").map(new MapFunction<String, Integer>() {
@Override
public Integer map(String value) {
return 0;
}
});
// output 1
data.filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) {
return false;
}
}).output(new DiscardingOutputFormat<Integer>());
// output 2 does a join before a join
data.join(env.fromElements(1, 2, 3, 4)).where(new IdentityKeyExtractor<Integer>()).equalTo(new IdentityKeyExtractor<Integer>()).output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
// output 3 is direct
data.output(new DiscardingOutputFormat<Integer>());
List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
// gather the optimizer DAG nodes
DataSinkNode sinkAfterFilter = sinks.get(0);
DataSinkNode sinkAfterJoin = sinks.get(1);
DataSinkNode sinkDirect = sinks.get(2);
SingleInputNode filterNode = (SingleInputNode) sinkAfterFilter.getPredecessorNode();
SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
TwoInputNode joinNode = (TwoInputNode) sinkAfterJoin.getPredecessorNode();
SingleInputNode joinInput = (SingleInputNode) joinNode.getSecondPredecessorNode();
// verify the non-pipeline breaking status
assertFalse(sinkAfterFilter.getInputConnection().isBreakingPipeline());
assertFalse(sinkAfterJoin.getInputConnection().isBreakingPipeline());
assertFalse(sinkDirect.getInputConnection().isBreakingPipeline());
assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
assertFalse(joinNode.getSecondIncomingConnection().isBreakingPipeline());
assertFalse(joinInput.getIncomingConnection().isBreakingPipeline());
// some other sanity checks on the plan construction (cannot hurt)
assertEquals(mapNode, ((SingleInputNode) joinNode.getFirstPredecessorNode()).getPredecessorNode());
assertEquals(mapNode, sinkDirect.getPredecessorNode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.dag.TwoInputNode in project flink by apache.
the class InterestingProperties method filterByCodeAnnotations.
public InterestingProperties filterByCodeAnnotations(OptimizerNode node, int input) {
InterestingProperties iProps = new InterestingProperties();
SemanticProperties props;
if (node instanceof SingleInputNode || node instanceof TwoInputNode) {
props = node.getSemanticProperties();
} else {
props = new SemanticProperties.EmptySemanticProperties();
}
for (RequestedGlobalProperties rgp : this.globalProps) {
RequestedGlobalProperties filtered = rgp.filterBySemanticProperties(props, input);
if (filtered != null && !filtered.isTrivial()) {
iProps.addGlobalProperties(filtered);
}
}
for (RequestedLocalProperties rlp : this.localProps) {
RequestedLocalProperties filtered = rlp.filterBySemanticProperties(props, input);
if (filtered != null && !filtered.isTrivial()) {
iProps.addLocalProperties(filtered);
}
}
return iProps;
}
use of org.apache.flink.optimizer.dag.TwoInputNode in project flink by apache.
the class GenericFlatTypePostPass method traverse.
@SuppressWarnings("unchecked")
protected void traverse(PlanNode node, T parentSchema, boolean createUtilities) {
// distinguish the node types
if (node instanceof SinkPlanNode) {
SinkPlanNode sn = (SinkPlanNode) node;
Channel inchannel = sn.getInput();
T schema = createEmptySchema();
sn.postPassHelper = schema;
// add the sinks information to the schema
try {
getSinkSchema(sn, schema);
} catch (ConflictingFieldTypeInfoException e) {
throw new CompilerPostPassException("Conflicting type infomation for the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
}
// descend to the input channel
try {
propagateToChannel(schema, inchannel, createUtilities);
} catch (MissingFieldTypeInfoException ex) {
throw new CompilerPostPassException("Missing type infomation for the channel that inputs to the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
}
} else if (node instanceof SourcePlanNode) {
if (createUtilities) {
((SourcePlanNode) node).setSerializer(createSerializer(parentSchema, node));
// nothing else to be done here. the source has no input and no strategy itself
}
} else if (node instanceof BulkIterationPlanNode) {
BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
// get the nodes current schema
T schema;
if (iterationNode.postPassHelper == null) {
schema = createEmptySchema();
iterationNode.postPassHelper = schema;
} else {
schema = (T) iterationNode.postPassHelper;
}
schema.increaseNumConnectionsThatContributed();
// add the parent schema to the schema
if (propagateParentSchemaDown) {
addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
}
// check whether all outgoing channels have not yet contributed. come back later if not.
if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
return;
}
if (iterationNode.getRootOfStepFunction() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile an iteration step function where next partial solution is created by a Union node.");
}
// traverse the termination criterion for the first time. create schema only, no utilities. Needed in case of intermediate termination criterion
if (iterationNode.getRootOfTerminationCriterion() != null) {
SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
traverse(addMapper.getInput().getSource(), createEmptySchema(), false);
try {
addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
} catch (MissingFieldTypeInfoException e) {
throw new RuntimeException(e);
}
}
// traverse the step function for the first time. create schema only, no utilities
traverse(iterationNode.getRootOfStepFunction(), schema, false);
T pss = (T) iterationNode.getPartialSolutionPlanNode().postPassHelper;
if (pss == null) {
throw new CompilerException("Error in Optimizer Post Pass: Partial solution schema is null after first traversal of the step function.");
}
// traverse the step function for the second time, taking the schema of the partial solution
traverse(iterationNode.getRootOfStepFunction(), pss, createUtilities);
if (iterationNode.getRootOfTerminationCriterion() != null) {
SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
traverse(addMapper.getInput().getSource(), createEmptySchema(), createUtilities);
try {
addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
} catch (MissingFieldTypeInfoException e) {
throw new RuntimeException(e);
}
}
// take the schema from the partial solution node and add its fields to the iteration result schema.
// input and output schema need to be identical, so this is essentially a sanity check
addSchemaToSchema(pss, schema, iterationNode.getProgramOperator().getName());
// set the serializer
if (createUtilities) {
iterationNode.setSerializerForIterationChannel(createSerializer(pss, iterationNode.getPartialSolutionPlanNode()));
}
// done, we can now propagate our info down
try {
propagateToChannel(schema, iterationNode.getInput(), createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
}
} else if (node instanceof WorksetIterationPlanNode) {
WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
// get the nodes current schema
T schema;
if (iterationNode.postPassHelper == null) {
schema = createEmptySchema();
iterationNode.postPassHelper = schema;
} else {
schema = (T) iterationNode.postPassHelper;
}
schema.increaseNumConnectionsThatContributed();
// add the parent schema to the schema (which refers to the solution set schema)
if (propagateParentSchemaDown) {
addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
}
// check whether all outgoing channels have not yet contributed. come back later if not.
if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
return;
}
if (iterationNode.getNextWorkSetPlanNode() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile a workset iteration step function where the next workset is produced by a Union node.");
}
if (iterationNode.getSolutionSetDeltaPlanNode() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile a workset iteration step function where the solution set delta is produced by a Union node.");
}
// traverse the step function
// pass an empty schema to the next workset and the parent schema to the solution set delta
// these first traversals are schema only
traverse(iterationNode.getNextWorkSetPlanNode(), createEmptySchema(), false);
traverse(iterationNode.getSolutionSetDeltaPlanNode(), schema, false);
T wss = (T) iterationNode.getWorksetPlanNode().postPassHelper;
T sss = (T) iterationNode.getSolutionSetPlanNode().postPassHelper;
if (wss == null) {
throw new CompilerException("Error in Optimizer Post Pass: Workset schema is null after first traversal of the step function.");
}
if (sss == null) {
throw new CompilerException("Error in Optimizer Post Pass: Solution set schema is null after first traversal of the step function.");
}
// make the second pass and instantiate the utilities
traverse(iterationNode.getNextWorkSetPlanNode(), wss, createUtilities);
traverse(iterationNode.getSolutionSetDeltaPlanNode(), sss, createUtilities);
// the solution set input and the result must have the same schema, this acts as a sanity check.
try {
for (Map.Entry<Integer, X> entry : sss) {
Integer pos = entry.getKey();
schema.addType(pos, entry.getValue());
}
} catch (ConflictingFieldTypeInfoException e) {
throw new CompilerPostPassException("Conflicting type information for field " + e.getFieldNumber() + " in node '" + iterationNode.getProgramOperator().getName() + "'. Contradicting types between the " + "result of the iteration and the solution set schema: " + e.getPreviousType() + " and " + e.getNewType() + ". Most probable cause: Invalid constant field annotations.");
}
// set the serializers and comparators
if (createUtilities) {
WorksetIterationNode optNode = iterationNode.getIterationNode();
iterationNode.setWorksetSerializer(createSerializer(wss, iterationNode.getWorksetPlanNode()));
iterationNode.setSolutionSetSerializer(createSerializer(sss, iterationNode.getSolutionSetPlanNode()));
try {
iterationNode.setSolutionSetComparator(createComparator(optNode.getSolutionSetKeyFields(), null, sss));
} catch (MissingFieldTypeInfoException ex) {
throw new CompilerPostPassException("Could not set up the solution set for workset iteration '" + optNode.getOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber() + '.');
}
}
// done, we can now propagate our info down
try {
propagateToChannel(schema, iterationNode.getInitialSolutionSetInput(), createUtilities);
propagateToChannel(wss, iterationNode.getInitialWorksetInput(), createUtilities);
} catch (MissingFieldTypeInfoException ex) {
throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber());
}
} else if (node instanceof SingleInputPlanNode) {
SingleInputPlanNode sn = (SingleInputPlanNode) node;
// get the nodes current schema
T schema;
if (sn.postPassHelper == null) {
schema = createEmptySchema();
sn.postPassHelper = schema;
} else {
schema = (T) sn.postPassHelper;
}
schema.increaseNumConnectionsThatContributed();
SingleInputNode optNode = sn.getSingleInputNode();
// add the parent schema to the schema
if (propagateParentSchemaDown) {
addSchemaToSchema(parentSchema, schema, optNode, 0);
}
// check whether all outgoing channels have not yet contributed. come back later if not.
if (schema.getNumConnectionsThatContributed() < sn.getOutgoingChannels().size()) {
return;
}
// add the nodes local information
try {
getSingleInputNodeSchema(sn, schema);
} catch (ConflictingFieldTypeInfoException e) {
throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
}
if (createUtilities) {
// parameterize the node's driver strategy
for (int i = 0; i < sn.getDriverStrategy().getNumRequiredComparators(); i++) {
try {
sn.setComparator(createComparator(sn.getKeys(i), sn.getSortOrders(i), schema), i);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
}
}
}
// done, we can now propagate our info down
try {
propagateToChannel(schema, sn.getInput(), createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
// don't forget the broadcast inputs
for (Channel c : sn.getBroadcastInputs()) {
try {
propagateToChannel(createEmptySchema(), c, createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
}
} else if (node instanceof DualInputPlanNode) {
DualInputPlanNode dn = (DualInputPlanNode) node;
// get the nodes current schema
T schema1;
T schema2;
if (dn.postPassHelper1 == null) {
schema1 = createEmptySchema();
schema2 = createEmptySchema();
dn.postPassHelper1 = schema1;
dn.postPassHelper2 = schema2;
} else {
schema1 = (T) dn.postPassHelper1;
schema2 = (T) dn.postPassHelper2;
}
schema1.increaseNumConnectionsThatContributed();
schema2.increaseNumConnectionsThatContributed();
TwoInputNode optNode = dn.getTwoInputNode();
// add the parent schema to the schema
if (propagateParentSchemaDown) {
addSchemaToSchema(parentSchema, schema1, optNode, 0);
addSchemaToSchema(parentSchema, schema2, optNode, 1);
}
// check whether all outgoing channels have not yet contributed. come back later if not.
if (schema1.getNumConnectionsThatContributed() < dn.getOutgoingChannels().size()) {
return;
}
// add the nodes local information
try {
getDualInputNodeSchema(dn, schema1, schema2);
} catch (ConflictingFieldTypeInfoException e) {
throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
}
// parameterize the node's driver strategy
if (createUtilities) {
if (dn.getDriverStrategy().getNumRequiredComparators() > 0) {
// set the individual comparators
try {
dn.setComparator1(createComparator(dn.getKeysForInput1(), dn.getSortOrders(), schema1));
dn.setComparator2(createComparator(dn.getKeysForInput2(), dn.getSortOrders(), schema2));
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
// set the pair comparator
try {
dn.setPairComparator(createPairComparator(dn.getKeysForInput1(), dn.getKeysForInput2(), dn.getSortOrders(), schema1, schema2));
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
}
}
// done, we can now propagate our info down
try {
propagateToChannel(schema1, dn.getInput1(), createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for the first input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
try {
propagateToChannel(schema2, dn.getInput2(), createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for the second input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
// don't forget the broadcast inputs
for (Channel c : dn.getBroadcastInputs()) {
try {
propagateToChannel(createEmptySchema(), c, createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
}
} else if (node instanceof NAryUnionPlanNode) {
// only propagate the info down
try {
for (Channel channel : node.getInputs()) {
propagateToChannel(parentSchema, channel, createUtilities);
}
} catch (MissingFieldTypeInfoException ex) {
throw new CompilerPostPassException("Could not set up runtime strategy for the input channel to " + " a union node. Missing type information for field " + ex.getFieldNumber());
}
} else // catch the sources of the iterative step functions
if (node instanceof BulkPartialSolutionPlanNode || node instanceof SolutionSetPlanNode || node instanceof WorksetPlanNode) {
// get the nodes current schema
T schema;
String name;
if (node instanceof BulkPartialSolutionPlanNode) {
BulkPartialSolutionPlanNode psn = (BulkPartialSolutionPlanNode) node;
if (psn.postPassHelper == null) {
schema = createEmptySchema();
psn.postPassHelper = schema;
} else {
schema = (T) psn.postPassHelper;
}
name = "partial solution of bulk iteration '" + psn.getPartialSolutionNode().getIterationNode().getOperator().getName() + "'";
} else if (node instanceof SolutionSetPlanNode) {
SolutionSetPlanNode ssn = (SolutionSetPlanNode) node;
if (ssn.postPassHelper == null) {
schema = createEmptySchema();
ssn.postPassHelper = schema;
} else {
schema = (T) ssn.postPassHelper;
}
name = "solution set of workset iteration '" + ssn.getSolutionSetNode().getIterationNode().getOperator().getName() + "'";
} else if (node instanceof WorksetPlanNode) {
WorksetPlanNode wsn = (WorksetPlanNode) node;
if (wsn.postPassHelper == null) {
schema = createEmptySchema();
wsn.postPassHelper = schema;
} else {
schema = (T) wsn.postPassHelper;
}
name = "workset of workset iteration '" + wsn.getWorksetNode().getIterationNode().getOperator().getName() + "'";
} else {
throw new CompilerException();
}
schema.increaseNumConnectionsThatContributed();
// add the parent schema to the schema
addSchemaToSchema(parentSchema, schema, name);
} else {
throw new CompilerPostPassException("Unknown node type encountered: " + node.getClass().getName());
}
}
use of org.apache.flink.optimizer.dag.TwoInputNode in project flink by apache.
the class PipelineBreakingTest method testReJoinedBranches.
/**
* Tests that branches that are re-joined have place pipeline breakers.
*
* <pre>
* /-> (sink)
* /
* /-> (reduce) -+ /-> (flatmap) -> (sink)
* / \ /
* (source) -> (map) - (join) -+-----\
* \ / \
* \-> (filter) -+ \
* \ (co group) -> (sink)
* \ /
* \-> (reduce) - /
* </pre>
*/
@Test
public void testReJoinedBranches() {
try {
// build a test program
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> data = env.fromElements(33L, 44L).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
});
DataSet<Tuple2<Long, Long>> reduced = data.groupBy(0).reduce(new SelectOneReducer<Tuple2<Long, Long>>());
reduced.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> filtered = data.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) throws Exception {
return false;
}
});
DataSet<Tuple2<Long, Long>> joined = reduced.join(filtered).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
joined.flatMap(new IdentityFlatMapper<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
joined.coGroup(filtered.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>())).where(0).equalTo(0).with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
// gather the optimizer DAG nodes
DataSinkNode sinkAfterReduce = sinks.get(0);
DataSinkNode sinkAfterFlatMap = sinks.get(1);
DataSinkNode sinkAfterCoGroup = sinks.get(2);
SingleInputNode reduceNode = (SingleInputNode) sinkAfterReduce.getPredecessorNode();
SingleInputNode mapNode = (SingleInputNode) reduceNode.getPredecessorNode();
SingleInputNode flatMapNode = (SingleInputNode) sinkAfterFlatMap.getPredecessorNode();
TwoInputNode joinNode = (TwoInputNode) flatMapNode.getPredecessorNode();
SingleInputNode filterNode = (SingleInputNode) joinNode.getSecondPredecessorNode();
TwoInputNode coGroupNode = (TwoInputNode) sinkAfterCoGroup.getPredecessorNode();
SingleInputNode otherReduceNode = (SingleInputNode) coGroupNode.getSecondPredecessorNode();
// test sanity checks (that we constructed the DAG correctly)
assertEquals(reduceNode, joinNode.getFirstPredecessorNode());
assertEquals(mapNode, filterNode.getPredecessorNode());
assertEquals(joinNode, coGroupNode.getFirstPredecessorNode());
assertEquals(filterNode, otherReduceNode.getPredecessorNode());
// verify the pipeline breaking status
assertFalse(sinkAfterReduce.getInputConnection().isBreakingPipeline());
assertFalse(sinkAfterFlatMap.getInputConnection().isBreakingPipeline());
assertFalse(sinkAfterCoGroup.getInputConnection().isBreakingPipeline());
assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
assertFalse(flatMapNode.getIncomingConnection().isBreakingPipeline());
assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
assertFalse(coGroupNode.getFirstIncomingConnection().isBreakingPipeline());
assertFalse(coGroupNode.getSecondIncomingConnection().isBreakingPipeline());
// these should be pipeline breakers
assertTrue(reduceNode.getIncomingConnection().isBreakingPipeline());
assertTrue(filterNode.getIncomingConnection().isBreakingPipeline());
assertTrue(otherReduceNode.getIncomingConnection().isBreakingPipeline());
assertTrue(joinNode.getSecondIncomingConnection().isBreakingPipeline());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations