use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class GroupOrderTest method testReduceWithGroupOrder.
@Test
public void testReduceWithGroupOrder() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple4<Long, Long, Long, Long>> set1 = env.readCsvFile("/tmp/fake.csv").types(Long.class, Long.class, Long.class, Long.class);
set1.groupBy(1).sortGroup(3, Order.DESCENDING).reduceGroup(new IdentityGroupReducer<Tuple4<Long, Long, Long, Long>>()).name("Reduce").output(new DiscardingOutputFormat<Tuple4<Long, Long, Long, Long>>()).name("Sink");
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan;
try {
oPlan = compileNoStats(plan);
} catch (CompilerException ce) {
ce.printStackTrace();
fail("The pact compiler is unable to compile this plan correctly.");
// silence the compiler
return;
}
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
SinkPlanNode sinkNode = resolver.getNode("Sink");
SingleInputPlanNode reducer = resolver.getNode("Reduce");
// verify the strategies
Assert.assertEquals(ShipStrategyType.FORWARD, sinkNode.getInput().getShipStrategy());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
Channel c = reducer.getInput();
Assert.assertEquals(LocalStrategy.SORT, c.getLocalStrategy());
FieldList ship = new FieldList(1);
FieldList local = new FieldList(1, 3);
Assert.assertEquals(ship, c.getShipStrategyKeys());
Assert.assertEquals(local, c.getLocalStrategyKeys());
Assert.assertTrue(c.getLocalStrategySortOrder()[0] == reducer.getSortOrders(0)[0]);
// check that we indeed sort descending
Assert.assertEquals(false, c.getLocalStrategySortOrder()[1]);
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithIncreasingGlobalParallelism2.
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
*
* Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
* Expected to re-establish partitioning between map and reduce (hash).
*/
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
final int p = DEFAULT_PARALLELISM;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(p);
set1.map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestMapReduce.
@Test
public void forwardFieldsTestMapReduce() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Integer, Integer>> set = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
set = set.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1").map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("*");
set.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileWithStats(plan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof ReduceOperatorBase) {
for (Channel input : visitable.getInputs()) {
GlobalProperties gprops = visitable.getGlobalProperties();
LocalProperties lprops = visitable.getLocalProperties();
Assert.assertTrue("Reduce should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.isPartitionedOnFields(new FieldSet(1)));
Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
Assert.assertTrue("Wrong LocalProperties on Reducer", lprops.getGroupedFields().contains(1));
}
}
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof MapOperatorBase) {
for (Channel input : visitable.getInputs()) {
GlobalProperties gprops = visitable.getGlobalProperties();
LocalProperties lprops = visitable.getLocalProperties();
Assert.assertTrue("Map should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.isPartitionedOnFields(new FieldSet(1)));
Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
Assert.assertTrue("Wrong LocalProperties on Mapper", lprops.getGroupedFields().contains(1));
}
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
}
});
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class SortPartialReuseTest method testPartialPartitioningReuse.
@Test
public void testPartialPartitioningReuse() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
@SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
input.partitionByHash(0).map(new IdentityMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1", "2").groupBy(0, 1).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1", "2").groupBy(0).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer2 = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode reducer1 = (SingleInputPlanNode) reducer2.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
// should be locally forwarding, reusing sort and partitioning
assertEquals(ShipStrategyType.FORWARD, reducer2.getInput().getShipStrategy());
assertEquals(LocalStrategy.NONE, reducer2.getInput().getLocalStrategy());
assertEquals(ShipStrategyType.FORWARD, reducer1.getInput().getShipStrategy());
assertEquals(LocalStrategy.COMBININGSORT, reducer1.getInput().getLocalStrategy());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class SortPartialReuseTest method testCustomPartitioningNotReused.
@Test
public void testCustomPartitioningNotReused() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
@SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
input.partitionCustom(new Partitioner<Long>() {
@Override
public int partition(Long key, int numPartitions) {
return 0;
}
}, 0).map(new IdentityMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1", "2").groupBy(0, 1).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1", "2").groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer2 = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode combiner = (SingleInputPlanNode) reducer2.getInput().getSource();
SingleInputPlanNode reducer1 = (SingleInputPlanNode) combiner.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
// should be locally forwarding, reusing sort and partitioning
assertEquals(ShipStrategyType.PARTITION_HASH, reducer2.getInput().getShipStrategy());
assertEquals(LocalStrategy.COMBININGSORT, reducer2.getInput().getLocalStrategy());
assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy());
assertEquals(ShipStrategyType.FORWARD, reducer1.getInput().getShipStrategy());
assertEquals(LocalStrategy.COMBININGSORT, reducer1.getInput().getLocalStrategy());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations