use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class HardPlansCompilationTest method testTicket158.
/**
* Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
* |--------------------------/ /
* |--------------------------------------------/
*
* First cross has SameKeyFirst output contract
*/
@Test
public void testTicket158() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> set1 = env.generateSequence(0, 1);
set1.map(new IdentityMapper<Long>()).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1").cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2").cross(set1).with(new IdentityCrosser<Long>()).name("Cross2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3").output(new DiscardingOutputFormat<Long>()).name("Sink");
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
jobGen.compileJobGraph(oPlan);
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class UnionReplacementTest method testConsecutiveUnionsWithRangePartitioning.
/**
*
* Checks that a plan with consecutive UNIONs followed by PARTITION_RANGE is correctly translated.
*
* The program can be illustrated as follows:
*
* Src1 -\
* >-> Union12--<
* Src2 -/ \
* >-> Union123 -> PartitionByRange -> Output
* Src3 ----------------/
*
* In the resulting plan, the range partitioning must be
* pushed to the inputs of the unions (Src1, Src2, Src3).
*
*/
@Test
public void testConsecutiveUnionsWithRangePartitioning() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
union123.partitionByRange(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
SingleInputPlanNode sink = resolver.getNode("out");
// check partitioning is correct
assertEquals("Sink input should be range partitioned.", PartitioningProperty.RANGE_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
assertEquals("Sink input should be range partitioned on 1", new Ordering(1, null, Order.ASCENDING), sink.getInput().getGlobalProperties().getPartitioningOrdering());
SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
assertEquals("Partitioner input should be range partitioned.", PartitioningProperty.RANGE_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
assertEquals("Partitioner input should be range partitioned on 1", new Ordering(1, null, Order.ASCENDING), partitioner.getInput().getGlobalProperties().getPartitioningOrdering());
assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
// all union inputs should be range partitioned
for (Channel c : union.getInputs()) {
assertEquals("Union input should be range partitioned", PartitioningProperty.RANGE_PARTITIONED, c.getGlobalProperties().getPartitioning());
assertEquals("Union input channel should be forwarded", ShipStrategyType.FORWARD, c.getShipStrategy());
// range partitioning is executed as custom partitioning with prior sampling
SingleInputPlanNode partitionMap = (SingleInputPlanNode) c.getSource();
assertEquals(DriverStrategy.MAP, partitionMap.getDriverStrategy());
assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionMap.getInput().getShipStrategy());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class UnionReplacementTest method testConsecutiveUnionsWithBroadcast.
/**
*
* Checks that a plan with consecutive UNIONs followed by broadcast-fwd JOIN is correctly translated.
*
* The program can be illustrated as follows:
*
* Src1 -\
* >-> Union12--<
* Src2 -/ \
* >-> Union123 --> bc-fwd-Join -> Output
* Src3 ----------------/ /
* /
* Src4 ----------------------------/
*
* In the resulting plan, the broadcasting must be
* pushed to the inputs of the unions (Src1, Src2, Src3).
*
*/
@Test
public void testConsecutiveUnionsWithBroadcast() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src4 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
union123.join(src4, JoinOperatorBase.JoinHint.BROADCAST_HASH_FIRST).where(0).equalTo(0).name("join").output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("out");
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
DualInputPlanNode join = resolver.getNode("join");
// check input of join is broadcasted
assertEquals("First join input should be fully replicated.", PartitioningProperty.FULL_REPLICATION, join.getInput1().getGlobalProperties().getPartitioning());
NAryUnionPlanNode union = (NAryUnionPlanNode) join.getInput1().getSource();
// check that all union inputs are broadcasted
for (Channel c : union.getInputs()) {
assertEquals("Union input should be fully replicated", PartitioningProperty.FULL_REPLICATION, c.getGlobalProperties().getPartitioning());
assertEquals("Union input channel should be broadcasting", ShipStrategyType.BROADCAST, c.getShipStrategy());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class UnionReplacementTest method testConsecutiveUnionsWithHashPartitioning.
/**
*
* Checks that a plan with consecutive UNIONs followed by PartitionByHash is correctly translated.
*
* The program can be illustrated as follows:
*
* Src1 -\
* >-> Union12--<
* Src2 -/ \
* >-> Union123 -> PartitionByHash -> Output
* Src3 ----------------/
*
* In the resulting plan, the hash partitioning (ShippingStrategy.PARTITION_HASH) must be
* pushed to the inputs of the unions (Src1, Src2, Src3).
*
*/
@Test
public void testConsecutiveUnionsWithHashPartitioning() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
union123.partitionByHash(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
SingleInputPlanNode sink = resolver.getNode("out");
// check partitioning is correct
assertEquals("Sink input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
assertEquals("Sink input should be hash partitioned on 1.", new FieldList(1), sink.getInput().getGlobalProperties().getPartitioningFields());
SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
assertEquals("Partitioner input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
assertEquals("Partitioner input should be hash partitioned on 1.", new FieldList(1), partitioner.getInput().getGlobalProperties().getPartitioningFields());
assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
// all union inputs should be hash partitioned
for (Channel c : union.getInputs()) {
assertEquals("Union input should be hash partitioned", PartitioningProperty.HASH_PARTITIONED, c.getGlobalProperties().getPartitioning());
assertEquals("Union input channel should be hash partitioning", ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ReduceAllTest method testReduce.
@Test
public void testReduce() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> set1 = env.generateSequence(0, 1);
set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1").output(new DiscardingOutputFormat<Long>()).name("Sink");
Plan plan = env.createProgramPlan();
try {
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
jobGen.compileJobGraph(oPlan);
} catch (CompilerException ce) {
ce.printStackTrace();
fail("The pact compiler is unable to compile this plan correctly");
}
}
Aggregations