Search in sources :

Example 1 with IdentityKeyExtractor

use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.

the class PipelineBreakingTest method testBranchingPlanNotReJoined.

/**
	 * Tests that branching plans, where the branches are not re-joined,
	 * do not place pipeline breakers.
	 * 
	 * <pre>
	 *                      /---> (filter) -> (sink)
	 *                     /
	 *                    /
	 * (source) -> (map) -----------------\
	 *                    \               (join) -> (sink)
	 *                     \   (source) --/
	 *                      \
	 *                       \
	 *                        \-> (sink)
	 * </pre>
	 */
@Test
public void testBranchingPlanNotReJoined() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Integer> data = env.readTextFile("/never/accessed").map(new MapFunction<String, Integer>() {

            @Override
            public Integer map(String value) {
                return 0;
            }
        });
        // output 1
        data.filter(new FilterFunction<Integer>() {

            @Override
            public boolean filter(Integer value) {
                return false;
            }
        }).output(new DiscardingOutputFormat<Integer>());
        // output 2 does a join before a join
        data.join(env.fromElements(1, 2, 3, 4)).where(new IdentityKeyExtractor<Integer>()).equalTo(new IdentityKeyExtractor<Integer>()).output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
        // output 3 is direct
        data.output(new DiscardingOutputFormat<Integer>());
        List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
        // gather the optimizer DAG nodes
        DataSinkNode sinkAfterFilter = sinks.get(0);
        DataSinkNode sinkAfterJoin = sinks.get(1);
        DataSinkNode sinkDirect = sinks.get(2);
        SingleInputNode filterNode = (SingleInputNode) sinkAfterFilter.getPredecessorNode();
        SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
        TwoInputNode joinNode = (TwoInputNode) sinkAfterJoin.getPredecessorNode();
        SingleInputNode joinInput = (SingleInputNode) joinNode.getSecondPredecessorNode();
        // verify the non-pipeline breaking status
        assertFalse(sinkAfterFilter.getInputConnection().isBreakingPipeline());
        assertFalse(sinkAfterJoin.getInputConnection().isBreakingPipeline());
        assertFalse(sinkDirect.getInputConnection().isBreakingPipeline());
        assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
        assertFalse(joinNode.getSecondIncomingConnection().isBreakingPipeline());
        assertFalse(joinInput.getIncomingConnection().isBreakingPipeline());
        // some other sanity checks on the plan construction (cannot hurt)
        assertEquals(mapNode, ((SingleInputNode) joinNode.getFirstPredecessorNode()).getPredecessorNode());
        assertEquals(mapNode, sinkDirect.getPredecessorNode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) IdentityKeyExtractor(org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TwoInputNode(org.apache.flink.optimizer.dag.TwoInputNode) Test(org.junit.Test)

Example 2 with IdentityKeyExtractor

use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.

the class IterationsCompilerTest method testResetPartialSolution.

@Test
public void testResetPartialSolution() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Long> width = env.generateSequence(1, 10);
        DataSet<Long> update = env.generateSequence(1, 10);
        DataSet<Long> lastGradient = env.generateSequence(1, 10);
        DataSet<Long> init = width.union(update).union(lastGradient);
        IterativeDataSet<Long> iteration = init.iterate(10);
        width = iteration.filter(new IdFilter<Long>());
        update = iteration.filter(new IdFilter<Long>());
        lastGradient = iteration.filter(new IdFilter<Long>());
        DataSet<Long> gradient = width.map(new IdentityMapper<Long>());
        DataSet<Long> term = gradient.join(lastGradient).where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()).with(new JoinFunction<Long, Long, Long>() {

            public Long join(Long first, Long second) {
                return null;
            }
        });
        update = update.map(new RichMapFunction<Long, Long>() {

            public Long map(Long value) {
                return null;
            }
        }).withBroadcastSet(term, "some-name");
        DataSet<Long> result = iteration.closeWith(width.union(update).union(lastGradient));
        result.output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IdentityKeyExtractor(org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Test(org.junit.Test)

Example 3 with IdentityKeyExtractor

use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.

the class NestedIterationsTest method testBulkIterationInClosure.

@Test
public void testBulkIterationInClosure() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Long> data1 = env.generateSequence(1, 100);
        DataSet<Long> data2 = env.generateSequence(1, 100);
        IterativeDataSet<Long> firstIteration = data1.iterate(100);
        DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
        IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
        DataSet<Long> joined = mainIteration.join(firstResult).where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()).with(new DummyFlatJoinFunction<Long>());
        DataSet<Long> mainResult = mainIteration.closeWith(joined);
        mainResult.output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        // optimizer should be able to translate this
        OptimizedPlan op = compileNoStats(p);
        // job graph generator should be able to translate this
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IdentityKeyExtractor(org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Test(org.junit.Test)

Example 4 with IdentityKeyExtractor

use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.

the class BranchingPlansCompilerTest method testIterationWithStaticInput.

/**
	 * <pre>
	 *             +---------Iteration-------+
	 *             |                         |
	 *    /--map--< >----\                   |
	 *   /         |      \         /-------< >---sink
	 * src-map     |     join------/         |
	 *   \         |      /                  |
	 *    \        +-----/-------------------+
	 *     \            /
	 *      \--reduce--/
	 * </pre>
	 */
@Test
public void testIterationWithStaticInput() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(100);
        DataSet<Long> source = env.generateSequence(1, 1000000);
        DataSet<Long> mapped = source.map(new IdentityMapper<Long>());
        DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>());
        IterativeDataSet<Long> iteration = mapped.iterate(10);
        iteration.closeWith(iteration.join(reduced).where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()).with(new DummyFlatJoinFunction<Long>())).output(new DiscardingOutputFormat<Long>());
        compileNoStats(env.createProgramPlan());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IdentityKeyExtractor(org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor) Test(org.junit.Test)

Example 5 with IdentityKeyExtractor

use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.

the class BranchingPlansCompilerTest method testBranchingBroadcastVariable.

@Test
public void testBranchingBroadcastVariable() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(100);
    DataSet<String> input1 = env.readTextFile(IN_FILE).name("source1");
    DataSet<String> input2 = env.readTextFile(IN_FILE).name("source2");
    DataSet<String> input3 = env.readTextFile(IN_FILE).name("source3");
    DataSet<String> result1 = input1.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(input3, "bc");
    DataSet<String> result2 = input2.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(input3, "bc");
    result1.join(result2).where(new IdentityKeyExtractor<String>()).equalTo(new IdentityKeyExtractor<String>()).with(new RichJoinFunction<String, String, String>() {

        @Override
        public String join(String first, String second) {
            return null;
        }
    }).withBroadcastSet(input3, "bc1").withBroadcastSet(input1, "bc2").withBroadcastSet(result1, "bc3").output(new DiscardingOutputFormat<String>());
    Plan plan = env.createProgramPlan();
    try {
        compileNoStats(plan);
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Top1GroupReducer(org.apache.flink.optimizer.testfunctions.Top1GroupReducer) IdentityKeyExtractor(org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)5 IdentityKeyExtractor (org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor)5 Test (org.junit.Test)5 Plan (org.apache.flink.api.common.Plan)3 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)3 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)2 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 DataSinkNode (org.apache.flink.optimizer.dag.DataSinkNode)1 SingleInputNode (org.apache.flink.optimizer.dag.SingleInputNode)1 TwoInputNode (org.apache.flink.optimizer.dag.TwoInputNode)1 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)1 Top1GroupReducer (org.apache.flink.optimizer.testfunctions.Top1GroupReducer)1