use of org.apache.flink.api.common.operators.base.FlatMapOperatorBase in project flink by apache.
the class UnionPropertyPropagationTest method testUnion2.
@Test
public void testUnion2() {
final int NUM_INPUTS = 4;
// construct the plan it will be multiple flat maps, all unioned
// and the "unioned" inputDataSet will be grouped
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> source = env.readTextFile(IN_FILE);
DataSet<Tuple2<String, Integer>> lastUnion = source.flatMap(new DummyFlatMap());
for (int i = 1; i < NUM_INPUTS; i++) {
lastUnion = lastUnion.union(source.flatMap(new DummyFlatMap()));
}
DataSet<Tuple2<String, Integer>> result = lastUnion.groupBy(0).aggregate(Aggregations.SUM, 1);
result.writeAsText(OUT_FILE);
// return the plan
Plan plan = env.createProgramPlan("Test union on new java-api");
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
/* Test on the union output connections
* It must be under the GroupOperator and the strategy should be forward
*/
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
final Channel inConn = ((SingleInputPlanNode) visitable).getInput();
Assert.assertTrue("Union should just forward the Partitioning", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Union Node should be under Group operator", inConn.getSource() instanceof NAryUnionPlanNode);
}
/* Test on the union input connections
* Must be NUM_INPUTS input connections, all FlatMapOperators with a own partitioning strategy (probably hash)
*/
if (visitable instanceof NAryUnionPlanNode) {
int numberInputs = 0;
for (Iterator<Channel> inputs = visitable.getInputs().iterator(); inputs.hasNext(); numberInputs++) {
final Channel inConn = inputs.next();
PlanNode inNode = inConn.getSource();
Assert.assertTrue("Input of Union should be FlatMapOperators", inNode.getProgramOperator() instanceof FlatMapOperatorBase);
Assert.assertTrue("Shipment strategy under union should partition the data", inConn.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
}
Assert.assertTrue("NAryUnion should have " + NUM_INPUTS + " inputs", numberInputs == NUM_INPUTS);
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
// DO NOTHING
}
});
}
use of org.apache.flink.api.common.operators.base.FlatMapOperatorBase in project flink by apache.
the class GraphCreatingVisitor method preVisit.
@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
// check if we have been here before
if (this.con2node.containsKey(c)) {
return false;
}
final OptimizerNode n;
// create a node for the operator (or sink or source) if we have not been here before
if (c instanceof GenericDataSinkBase) {
DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
this.sinks.add(dsn);
n = dsn;
} else if (c instanceof GenericDataSourceBase) {
n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
} else if (c instanceof MapOperatorBase) {
n = new MapNode((MapOperatorBase<?, ?, ?>) c);
} else if (c instanceof MapPartitionOperatorBase) {
n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
} else if (c instanceof FlatMapOperatorBase) {
n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
} else if (c instanceof FilterOperatorBase) {
n = new FilterNode((FilterOperatorBase<?, ?>) c);
} else if (c instanceof ReduceOperatorBase) {
n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
} else if (c instanceof GroupCombineOperatorBase) {
n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
} else if (c instanceof GroupReduceOperatorBase) {
n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
} else if (c instanceof InnerJoinOperatorBase) {
n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof OuterJoinOperatorBase) {
n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupOperatorBase) {
n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupRawOperatorBase) {
n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CrossOperatorBase) {
n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof BulkIterationBase) {
n = new BulkIterationNode((BulkIterationBase<?>) c);
} else if (c instanceof DeltaIterationBase) {
n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
} else if (c instanceof Union) {
n = new BinaryUnionNode((Union<?>) c);
} else if (c instanceof PartitionOperatorBase) {
n = new PartitionNode((PartitionOperatorBase<?>) c);
} else if (c instanceof SortPartitionOperatorBase) {
n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
} else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
WorksetNode p = new WorksetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else {
throw new IllegalArgumentException("Unknown operator type: " + c);
}
this.con2node.put(c, n);
// key-less reducer (all-reduce)
if (n.getParallelism() < 1) {
// set the parallelism
int par = c.getParallelism();
if (n instanceof BinaryUnionNode) {
// Keep parallelism of union undefined for now.
// It will be determined based on the parallelism of its successor.
par = -1;
} else if (par > 0) {
if (this.forceParallelism && par != this.defaultParallelism) {
par = this.defaultParallelism;
Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
}
} else {
par = this.defaultParallelism;
}
n.setParallelism(par);
}
return true;
}
Aggregations