use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class UnionPropertyPropagationTest method testUnion2.
@Test
public void testUnion2() {
final int NUM_INPUTS = 4;
// construct the plan it will be multiple flat maps, all unioned
// and the "unioned" inputDataSet will be grouped
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> source = env.readTextFile(IN_FILE);
DataSet<Tuple2<String, Integer>> lastUnion = source.flatMap(new DummyFlatMap());
for (int i = 1; i < NUM_INPUTS; i++) {
lastUnion = lastUnion.union(source.flatMap(new DummyFlatMap()));
}
DataSet<Tuple2<String, Integer>> result = lastUnion.groupBy(0).aggregate(Aggregations.SUM, 1);
result.writeAsText(OUT_FILE);
// return the plan
Plan plan = env.createProgramPlan("Test union on new java-api");
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
/* Test on the union output connections
* It must be under the GroupOperator and the strategy should be forward
*/
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
final Channel inConn = ((SingleInputPlanNode) visitable).getInput();
Assert.assertTrue("Union should just forward the Partitioning", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Union Node should be under Group operator", inConn.getSource() instanceof NAryUnionPlanNode);
}
/* Test on the union input connections
* Must be NUM_INPUTS input connections, all FlatMapOperators with a own partitioning strategy(propably hash)
*/
if (visitable instanceof NAryUnionPlanNode) {
int numberInputs = 0;
for (Iterator<Channel> inputs = visitable.getInputs().iterator(); inputs.hasNext(); numberInputs++) {
final Channel inConn = inputs.next();
PlanNode inNode = inConn.getSource();
Assert.assertTrue("Input of Union should be FlatMapOperators", inNode.getProgramOperator() instanceof FlatMapOperatorBase);
Assert.assertTrue("Shipment strategy under union should partition the data", inConn.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
}
Assert.assertTrue("NAryUnion should have " + NUM_INPUTS + " inputs", numberInputs == NUM_INPUTS);
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
// DO NOTHING
}
});
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class UnionPropertyPropagationTest method testUnion1.
@Test
public void testUnion1() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> sourceA = env.generateSequence(0, 1);
DataSet<Long> sourceB = env.generateSequence(0, 1);
DataSet<Long> redA = sourceA.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
DataSet<Long> redB = sourceB.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
redA.union(redB).groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
for (Channel inConn : visitable.getInputs()) {
Assert.assertTrue("Reduce should just forward the input if it is already partitioned", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
}
//just check latest ReduceNode
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
// DO NOTHING
}
});
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestJoin.
@Test
public void forwardFieldsTestJoin() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Integer, Integer>> in1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
DataSet<Tuple3<Integer, Integer, Integer>> in2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
in1 = in1.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1");
in2 = in2.map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("f1->f2");
DataSet<Tuple3<Integer, Integer, Integer>> out = in1.join(in2).where(1).equalTo(2).with(new MockJoin());
out.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileWithStats(plan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof DualInputPlanNode && visitable.getProgramOperator() instanceof InnerJoinOperatorBase) {
DualInputPlanNode node = ((DualInputPlanNode) visitable);
final Channel inConn1 = node.getInput1();
final Channel inConn2 = node.getInput2();
Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn1.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn2.getShipStrategy() == ShipStrategyType.FORWARD);
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
}
});
}
Aggregations