Search in sources :

Example 71 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithHashPartitioning.

/**
 * Checks that a plan with consecutive UNIONs followed by PartitionByHash is correctly
 * translated.
 *
 * <p>The program can be illustrated as follows:
 *
 * <p>Src1 -\ >-> Union12--< Src2 -/ \ >-> Union123 -> PartitionByHash -> Output Src3
 * ----------------/
 *
 * <p>In the resulting plan, the hash partitioning (ShippingStrategy.PARTITION_HASH) must be
 * pushed to the inputs of the unions (Src1, Src2, Src3).
 */
@Test
public void testConsecutiveUnionsWithHashPartitioning() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.partitionByHash(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Sink input should be hash partitioned on 1.", new FieldList(1), sink.getInput().getGlobalProperties().getPartitioningFields());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input should be hash partitioned on 1.", new FieldList(1), partitioner.getInput().getGlobalProperties().getPartitioningFields());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be hash partitioned
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be hash partitioned", PartitioningProperty.HASH_PARTITIONED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be hash partitioning", ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 72 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class GroupOrderTest method testCoGroupWithGroupOrder.

@Test
public void testCoGroupWithGroupOrder() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple7<Long, Long, Long, Long, Long, Long, Long>> set1 = env.readCsvFile("/tmp/fake1.csv").types(Long.class, Long.class, Long.class, Long.class, Long.class, Long.class, Long.class);
    DataSet<Tuple7<Long, Long, Long, Long, Long, Long, Long>> set2 = env.readCsvFile("/tmp/fake2.csv").types(Long.class, Long.class, Long.class, Long.class, Long.class, Long.class, Long.class);
    set1.coGroup(set2).where(3, 0).equalTo(6, 0).sortFirstGroup(5, Order.DESCENDING).sortSecondGroup(1, Order.DESCENDING).sortSecondGroup(4, Order.ASCENDING).with(new IdentityCoGrouper<Tuple7<Long, Long, Long, Long, Long, Long, Long>>()).name("CoGroup").output(new DiscardingOutputFormat<Tuple7<Long, Long, Long, Long, Long, Long, Long>>()).name("Sink");
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan;
    try {
        oPlan = compileNoStats(plan);
    } catch (CompilerException ce) {
        ce.printStackTrace();
        fail("The pact compiler is unable to compile this plan correctly.");
        // silence the compiler
        return;
    }
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
    SinkPlanNode sinkNode = resolver.getNode("Sink");
    DualInputPlanNode coGroupNode = resolver.getNode("CoGroup");
    // verify the strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sinkNode.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, coGroupNode.getInput1().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, coGroupNode.getInput2().getShipStrategy());
    Channel c1 = coGroupNode.getInput1();
    Channel c2 = coGroupNode.getInput2();
    Assert.assertEquals(LocalStrategy.SORT, c1.getLocalStrategy());
    Assert.assertEquals(LocalStrategy.SORT, c2.getLocalStrategy());
    FieldList ship1 = new FieldList(3, 0);
    FieldList ship2 = new FieldList(6, 0);
    FieldList local1 = new FieldList(3, 0, 5);
    FieldList local2 = new FieldList(6, 0, 1, 4);
    Assert.assertEquals(ship1, c1.getShipStrategyKeys());
    Assert.assertEquals(ship2, c2.getShipStrategyKeys());
    Assert.assertEquals(local1, c1.getLocalStrategyKeys());
    Assert.assertEquals(local2, c2.getLocalStrategyKeys());
    Assert.assertTrue(c1.getLocalStrategySortOrder()[0] == coGroupNode.getSortOrders()[0]);
    Assert.assertTrue(c1.getLocalStrategySortOrder()[1] == coGroupNode.getSortOrders()[1]);
    Assert.assertTrue(c2.getLocalStrategySortOrder()[0] == coGroupNode.getSortOrders()[0]);
    Assert.assertTrue(c2.getLocalStrategySortOrder()[1] == coGroupNode.getSortOrders()[1]);
    // check that the local group orderings are correct
    Assert.assertEquals(false, c1.getLocalStrategySortOrder()[2]);
    Assert.assertEquals(false, c2.getLocalStrategySortOrder()[2]);
    Assert.assertEquals(true, c2.getLocalStrategySortOrder()[3]);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple7(org.apache.flink.api.java.tuple.Tuple7) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) IdentityCoGrouper(org.apache.flink.optimizer.testfunctions.IdentityCoGrouper) Test(org.junit.Test)

Example 73 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class PartitionPushdownTest method testPartitioningReused.

@Test
public void testPartitioningReused() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
        input.groupBy(0).sum(1).groupBy(0, 1).sum(2).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode agg1Reducer = (SingleInputPlanNode) agg2Reducer.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, agg2Reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, agg1Reducer.getInput().getShipStrategy());
        assertEquals(new FieldList(0), agg1Reducer.getInput().getShipStrategyKeys());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 74 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class DistinctCompilationTest method testDistinctPlain.

@Test
public void testDistinctPlain() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.distinct().name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0, 1), reduceNode.getKeys(0));
        assertEquals(new FieldList(0, 1), combineNode.getKeys(0));
        assertEquals(new FieldList(0, 1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 75 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class DistinctCompilationTest method testDistinctWithSelectorFunctionKey.

@Test
public void testDistinctWithSelectorFunctionKey() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.distinct(new KeySelector<Tuple2<String, Double>, String>() {

            public String getKey(Tuple2<String, Double> value) {
                return value.f0;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // get the key extractors and projectors
        SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
        SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, keyExtractor.getInput().getSource());
        assertEquals(keyProjector, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0), reduceNode.getKeys(0));
        assertEquals(new FieldList(0), combineNode.getKeys(0));
        assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, keyExtractor.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, keyProjector.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

FieldList (org.apache.flink.api.common.operators.util.FieldList)80 Test (org.junit.Test)70 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)31 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)28 Plan (org.apache.flink.api.common.Plan)26 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)26 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)25 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)25 Ordering (org.apache.flink.api.common.operators.Ordering)24 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)23 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)19 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)18 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)12 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)12 Channel (org.apache.flink.optimizer.plan.Channel)11 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)9 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)7 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 Edge (org.apache.flink.graph.Edge)6