Search in sources :

Example 26 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class HardPlansCompilationTest method testTicket158.

/**
	 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
	 * |--------------------------/                  /
	 * |--------------------------------------------/
	 * 
	 * First cross has SameKeyFirst output contract
	 */
@Test
public void testTicket158() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Long> set1 = env.generateSequence(0, 1);
    set1.map(new IdentityMapper<Long>()).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1").cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2").cross(set1).with(new IdentityCrosser<Long>()).name("Cross2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3").output(new DiscardingOutputFormat<Long>()).name("Sink");
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    jobGen.compileJobGraph(oPlan);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityCrosser(org.apache.flink.optimizer.testfunctions.IdentityCrosser) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 27 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithRangePartitioning.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by PARTITION_RANGE is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 -> PartitionByRange -> Output
	 * Src3 ----------------/
	 *
	 * In the resulting plan, the range partitioning must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithRangePartitioning() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.partitionByRange(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be range partitioned.", PartitioningProperty.RANGE_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Sink input should be range partitioned on 1", new Ordering(1, null, Order.ASCENDING), sink.getInput().getGlobalProperties().getPartitioningOrdering());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be range partitioned.", PartitioningProperty.RANGE_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input should be range partitioned on 1", new Ordering(1, null, Order.ASCENDING), partitioner.getInput().getGlobalProperties().getPartitioningOrdering());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be range partitioned
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be range partitioned", PartitioningProperty.RANGE_PARTITIONED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be forwarded", ShipStrategyType.FORWARD, c.getShipStrategy());
        // range partitioning is executed as custom partitioning with prior sampling
        SingleInputPlanNode partitionMap = (SingleInputPlanNode) c.getSource();
        assertEquals(DriverStrategy.MAP, partitionMap.getDriverStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionMap.getInput().getShipStrategy());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Channel(org.apache.flink.optimizer.plan.Channel) Ordering(org.apache.flink.api.common.operators.Ordering) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 28 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithBroadcast.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by broadcast-fwd JOIN is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 --> bc-fwd-Join -> Output
	 * Src3 ----------------/             /
	 *                                   /
	 * Src4 ----------------------------/
	 *
	 * In the resulting plan, the broadcasting must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithBroadcast() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src4 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.join(src4, JoinOperatorBase.JoinHint.BROADCAST_HASH_FIRST).where(0).equalTo(0).name("join").output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    DualInputPlanNode join = resolver.getNode("join");
    // check input of join is broadcasted
    assertEquals("First join input should be fully replicated.", PartitioningProperty.FULL_REPLICATION, join.getInput1().getGlobalProperties().getPartitioning());
    NAryUnionPlanNode union = (NAryUnionPlanNode) join.getInput1().getSource();
    // check that all union inputs are broadcasted
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be fully replicated", PartitioningProperty.FULL_REPLICATION, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be broadcasting", ShipStrategyType.BROADCAST, c.getShipStrategy());
    }
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Channel(org.apache.flink.optimizer.plan.Channel) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 29 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithHashPartitioning.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by PartitionByHash is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 -> PartitionByHash -> Output
	 * Src3 ----------------/
	 *
	 * In the resulting plan, the hash partitioning (ShippingStrategy.PARTITION_HASH) must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithHashPartitioning() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.partitionByHash(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Sink input should be hash partitioned on 1.", new FieldList(1), sink.getInput().getGlobalProperties().getPartitioningFields());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input should be hash partitioned on 1.", new FieldList(1), partitioner.getInput().getGlobalProperties().getPartitioningFields());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be hash partitioned
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be hash partitioned", PartitioningProperty.HASH_PARTITIONED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be hash partitioning", ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 30 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class ReduceAllTest method testReduce.

@Test
public void testReduce() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Long> set1 = env.generateSequence(0, 1);
    set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1").output(new DiscardingOutputFormat<Long>()).name("Sink");
    Plan plan = env.createProgramPlan();
    try {
        OptimizedPlan oPlan = compileNoStats(plan);
        JobGraphGenerator jobGen = new JobGraphGenerator();
        jobGen.compileJobGraph(oPlan);
    } catch (CompilerException ce) {
        ce.printStackTrace();
        fail("The pact compiler is unable to compile this plan correctly");
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)39 Test (org.junit.Test)35 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)33 Plan (org.apache.flink.api.common.Plan)28 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)28 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)27 FieldList (org.apache.flink.api.common.operators.util.FieldList)21 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)17 DataSet (org.apache.flink.api.java.DataSet)11 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)11 Graph (org.apache.flink.graph.Graph)10 Channel (org.apache.flink.optimizer.plan.Channel)8 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 NullValue (org.apache.flink.types.NullValue)7 MapFunction (org.apache.flink.api.common.functions.MapFunction)6 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)6 LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)5 Edge (org.apache.flink.graph.Edge)5