Search in sources :

Example 56 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindMultiMaps.

/**
	 * Tests join program with replicated data source behind multiple map ops.
	 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMultiMaps() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
    ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
    DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
    DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
    DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.filter(new NoFilter()).mapPartition(new IdPMap()).flatMap(new IdFlatMap()).map(new IdMap()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    // when join should have forward strategy on both sides
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
    ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
    ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) ReplicatingInputFormat(org.apache.flink.api.common.io.ReplicatingInputFormat) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 57 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestJoin.

@Test
public void forwardFieldsTestJoin() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Integer, Integer>> in1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
    DataSet<Tuple3<Integer, Integer, Integer>> in2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
    in1 = in1.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1");
    in2 = in2.map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("f1->f2");
    DataSet<Tuple3<Integer, Integer, Integer>> out = in1.join(in2).where(1).equalTo(2).with(new MockJoin());
    out.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileWithStats(plan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof DualInputPlanNode && visitable.getProgramOperator() instanceof InnerJoinOperatorBase) {
                DualInputPlanNode node = ((DualInputPlanNode) visitable);
                final Channel inConn1 = node.getInput1();
                final Channel inConn2 = node.getInput2();
                Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn1.getShipStrategy() == ShipStrategyType.FORWARD);
                Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn2.getShipStrategy() == ShipStrategyType.FORWARD);
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple3(org.apache.flink.api.java.tuple.Tuple3) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Test(org.junit.Test)

Example 58 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class BinaryCustomPartitioningCompatibilityTest method testCompatiblePartitioningJoin.

@Test
public void testCompatiblePartitioningJoin() {
    try {
        final Partitioner<Long> partitioner = new Partitioner<Long>() {

            @Override
            public int partition(Long key, int numPartitions) {
                return 0;
            }
        };
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
        DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
        input1.partitionCustom(partitioner, 1).join(input2.partitionCustom(partitioner, 0)).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode partitioner1 = (SingleInputPlanNode) join.getInput1().getSource();
        SingleInputPlanNode partitioner2 = (SingleInputPlanNode) join.getInput2().getSource();
        assertEquals(ShipStrategyType.FORWARD, join.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, join.getInput2().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner1.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner2.getInput().getShipStrategy());
        assertEquals(partitioner, partitioner1.getInput().getPartitioner());
        assertEquals(partitioner, partitioner2.getInput().getPartitioner());
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Partitioner(org.apache.flink.api.common.functions.Partitioner) Test(org.junit.Test)

Example 59 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class BinaryCustomPartitioningCompatibilityTest method testCompatiblePartitioningCoGroup.

@Test
public void testCompatiblePartitioningCoGroup() {
    try {
        final Partitioner<Long> partitioner = new Partitioner<Long>() {

            @Override
            public int partition(Long key, int numPartitions) {
                return 0;
            }
        };
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
        DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
        input1.partitionCustom(partitioner, 1).coGroup(input2.partitionCustom(partitioner, 0)).where(1).equalTo(0).with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode coGroup = (DualInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode partitioner1 = (SingleInputPlanNode) coGroup.getInput1().getSource();
        SingleInputPlanNode partitioner2 = (SingleInputPlanNode) coGroup.getInput2().getSource();
        assertEquals(ShipStrategyType.FORWARD, coGroup.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, coGroup.getInput2().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner1.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner2.getInput().getShipStrategy());
        assertEquals(partitioner, partitioner1.getInput().getPartitioner());
        assertEquals(partitioner, partitioner2.getInput().getPartitioner());
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Partitioner(org.apache.flink.api.common.functions.Partitioner) DummyCoGroupFunction(org.apache.flink.optimizer.testfunctions.DummyCoGroupFunction) Test(org.junit.Test)

Example 60 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class CoGroupCustomPartitioningTest method testCoGroupWithKeySelectors.

@Test
public void testCoGroupWithKeySelectors() {
    try {
        final Partitioner<Integer> partitioner = new TestPartitionerInt();
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
        DataSet<Pojo3> input2 = env.fromElements(new Pojo3());
        input1.coGroup(input2).where(new Pojo2KeySelector()).equalTo(new Pojo3KeySelector()).withPartitioner(partitioner).with(new DummyCoGroupFunction<Pojo2, Pojo3>()).output(new DiscardingOutputFormat<Tuple2<Pojo2, Pojo3>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
        assertEquals(partitioner, join.getInput1().getPartitioner());
        assertEquals(partitioner, join.getInput2().getPartitioner());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) DummyCoGroupFunction(org.apache.flink.optimizer.testfunctions.DummyCoGroupFunction) Test(org.junit.Test)

Aggregations

DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)96 Test (org.junit.Test)86 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)81 Plan (org.apache.flink.api.common.Plan)76 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)67 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)65 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)36 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)31 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)19 Channel (org.apache.flink.optimizer.plan.Channel)14 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)13 FieldList (org.apache.flink.api.common.operators.util.FieldList)12 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)11 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)11 PlanNode (org.apache.flink.optimizer.plan.PlanNode)11 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)10 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)10 ShipStrategyType (org.apache.flink.runtime.operators.shipping.ShipStrategyType)10 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)8