Search in sources :

Example 1 with IdentityGroupReducerCombinable

use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable in project flink by apache.

the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields2.

@Test
public void testRangePartitionOperatorPreservesFields2() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
        PartitionOperator<Tuple2<Long, Long>> rangePartitioned = data.partitionByRange(1);
        rangePartitioned.groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        data.groupBy(0).aggregate(Aggregations.SUM, 1).map(new MapFunction<Tuple2<Long, Long>, Long>() {

            @Override
            public Long map(Tuple2<Long, Long> value) throws Exception {
                return value.f1;
            }
        }).output(new DiscardingOutputFormat<Long>());
        rangePartitioned.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) throws Exception {
                return value.f0 % 2 == 0;
            }
        }).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
        SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
        SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
        List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
        assertEquals(3, sourceOutgoingChannels.size());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(2).getShipStrategy());
        assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
        assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(1).getDataExchangeMode());
        assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(2).getDataExchangeMode());
        List<Channel> partitionOutputChannels = partitionNode.getOutgoingChannels();
        assertEquals(2, partitionOutputChannels.size());
        assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(0).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(1).getShipStrategy());
        assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(0).getDataExchangeMode());
        assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(1).getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Channel(org.apache.flink.optimizer.plan.Channel) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 2 with IdentityGroupReducerCombinable

use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable in project flink by apache.

the class CustomPartitioningGlobalOptimizationTest method testJoinReduceCombination.

@Test
public void testJoinReduceCombination() {
    try {
        final Partitioner<Long> partitioner = new TestPartitionerLong();
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
        DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
        DataSet<Tuple3<Long, Long, Long>> joined = input1.join(input2).where(1).equalTo(0).projectFirst(0, 1).<Tuple3<Long, Long, Long>>projectSecond(2).withPartitioner(partitioner);
        joined.groupBy(1).withPartitioner(partitioner).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        assertTrue("Reduce is not chained, property reuse does not happen", reducer.getInput().getSource() instanceof DualInputPlanNode);
        DualInputPlanNode join = (DualInputPlanNode) reducer.getInput().getSource();
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
        assertEquals(partitioner, join.getInput1().getPartitioner());
        assertEquals(partitioner, join.getInput2().getPartitioner());
        assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 3 with IdentityGroupReducerCombinable

use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable in project flink by apache.

the class CoGroupCustomPartitioningTest method testIncompatibleHashAndCustomPartitioning.

@Test
public void testIncompatibleHashAndCustomPartitioning() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
        DataSet<Tuple3<Long, Long, Long>> partitioned = input.partitionCustom(new Partitioner<Long>() {

            @Override
            public int partition(Long key, int numPartitions) {
                return 0;
            }
        }, 0).map(new IdentityMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1", "2");
        DataSet<Tuple3<Long, Long, Long>> grouped = partitioned.distinct(0, 1).groupBy(1).sortGroup(0, Order.ASCENDING).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1");
        grouped.coGroup(partitioned).where(0).equalTo(0).with(new DummyCoGroupFunction<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode coGroup = (DualInputPlanNode) sink.getInput().getSource();
        assertEquals(ShipStrategyType.PARTITION_HASH, coGroup.getInput1().getShipStrategy());
        assertTrue(coGroup.getInput2().getShipStrategy() == ShipStrategyType.PARTITION_HASH || coGroup.getInput2().getShipStrategy() == ShipStrategyType.FORWARD);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) DummyCoGroupFunction(org.apache.flink.optimizer.testfunctions.DummyCoGroupFunction) Test(org.junit.Test)

Example 4 with IdentityGroupReducerCombinable

use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable in project flink by apache.

the class GroupingTupleTranslationTest method testCustomPartitioningTupleGroupReduce.

@Test
public void testCustomPartitioningTupleGroupReduce() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)).rebalance().setParallelism(4);
        data.groupBy(0).withPartitioner(new TestPartitionerInt()).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Integer, Integer>>()).output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 5 with IdentityGroupReducerCombinable

use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable in project flink by apache.

the class SortPartialReuseTest method testPartialPartitioningReuse.

@Test
public void testPartialPartitioningReuse() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
        input.partitionByHash(0).map(new IdentityMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1", "2").groupBy(0, 1).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1", "2").groupBy(0).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer2 = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode reducer1 = (SingleInputPlanNode) reducer2.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        // should be locally forwarding, reusing sort and partitioning
        assertEquals(ShipStrategyType.FORWARD, reducer2.getInput().getShipStrategy());
        assertEquals(LocalStrategy.NONE, reducer2.getInput().getLocalStrategy());
        assertEquals(ShipStrategyType.FORWARD, reducer1.getInput().getShipStrategy());
        assertEquals(LocalStrategy.COMBININGSORT, reducer1.getInput().getLocalStrategy());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) Tuple3(org.apache.flink.api.java.tuple.Tuple3) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

Plan (org.apache.flink.api.common.Plan)16 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)16 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)16 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)16 IdentityGroupReducerCombinable (org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable)16 Test (org.junit.Test)16 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)14 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)10 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)7 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)4 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)3 Channel (org.apache.flink.optimizer.plan.Channel)2 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)2 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 Partitioner (org.apache.flink.api.common.functions.Partitioner)1 JoinHint (org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint)1 Tuple4 (org.apache.flink.api.java.tuple.Tuple4)1 DummyCoGroupFunction (org.apache.flink.optimizer.testfunctions.DummyCoGroupFunction)1