Search in sources :

Example 1 with DataSinkNode

use of org.apache.flink.optimizer.dag.DataSinkNode in project flink by apache.

the class PipelineBreakingTest method testSimpleForwardPlan.

/**
	 * Tests that no pipeline breakers are inserted into a simple forward
	 * pipeline.
	 *
	 * <pre>
	 *     (source) -> (map) -> (filter) -> (groupBy / reduce)
	 * </pre>
	 */
@Test
public void testSimpleForwardPlan() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<String> dataSet = env.readTextFile("/never/accessed");
        dataSet.map(new MapFunction<String, Integer>() {

            @Override
            public Integer map(String value) {
                return 0;
            }
        }).filter(new FilterFunction<Integer>() {

            @Override
            public boolean filter(Integer value) {
                return false;
            }
        }).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
        DataSinkNode sinkNode = convertPlan(env.createProgramPlan()).get(0);
        SingleInputNode reduceNode = (SingleInputNode) sinkNode.getPredecessorNode();
        SingleInputNode keyExtractorNode = (SingleInputNode) reduceNode.getPredecessorNode();
        SingleInputNode filterNode = (SingleInputNode) keyExtractorNode.getPredecessorNode();
        SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
        assertFalse(sinkNode.getInputConnection().isBreakingPipeline());
        assertFalse(reduceNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(keyExtractorNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Top1GroupReducer(org.apache.flink.optimizer.testfunctions.Top1GroupReducer) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Test(org.junit.Test)

Example 2 with DataSinkNode

use of org.apache.flink.optimizer.dag.DataSinkNode in project flink by apache.

the class PipelineBreakingTest method testBranchingPlanNotReJoined.

/**
	 * Tests that branching plans, where the branches are not re-joined,
	 * do not place pipeline breakers.
	 * 
	 * <pre>
	 *                      /---> (filter) -> (sink)
	 *                     /
	 *                    /
	 * (source) -> (map) -----------------\
	 *                    \               (join) -> (sink)
	 *                     \   (source) --/
	 *                      \
	 *                       \
	 *                        \-> (sink)
	 * </pre>
	 */
@Test
public void testBranchingPlanNotReJoined() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Integer> data = env.readTextFile("/never/accessed").map(new MapFunction<String, Integer>() {

            @Override
            public Integer map(String value) {
                return 0;
            }
        });
        // output 1
        data.filter(new FilterFunction<Integer>() {

            @Override
            public boolean filter(Integer value) {
                return false;
            }
        }).output(new DiscardingOutputFormat<Integer>());
        // output 2 does a join before a join
        data.join(env.fromElements(1, 2, 3, 4)).where(new IdentityKeyExtractor<Integer>()).equalTo(new IdentityKeyExtractor<Integer>()).output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
        // output 3 is direct
        data.output(new DiscardingOutputFormat<Integer>());
        List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
        // gather the optimizer DAG nodes
        DataSinkNode sinkAfterFilter = sinks.get(0);
        DataSinkNode sinkAfterJoin = sinks.get(1);
        DataSinkNode sinkDirect = sinks.get(2);
        SingleInputNode filterNode = (SingleInputNode) sinkAfterFilter.getPredecessorNode();
        SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
        TwoInputNode joinNode = (TwoInputNode) sinkAfterJoin.getPredecessorNode();
        SingleInputNode joinInput = (SingleInputNode) joinNode.getSecondPredecessorNode();
        // verify the non-pipeline breaking status
        assertFalse(sinkAfterFilter.getInputConnection().isBreakingPipeline());
        assertFalse(sinkAfterJoin.getInputConnection().isBreakingPipeline());
        assertFalse(sinkDirect.getInputConnection().isBreakingPipeline());
        assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
        assertFalse(joinNode.getSecondIncomingConnection().isBreakingPipeline());
        assertFalse(joinInput.getIncomingConnection().isBreakingPipeline());
        // some other sanity checks on the plan construction (cannot hurt)
        assertEquals(mapNode, ((SingleInputNode) joinNode.getFirstPredecessorNode()).getPredecessorNode());
        assertEquals(mapNode, sinkDirect.getPredecessorNode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) IdentityKeyExtractor(org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TwoInputNode(org.apache.flink.optimizer.dag.TwoInputNode) Test(org.junit.Test)

Example 3 with DataSinkNode

use of org.apache.flink.optimizer.dag.DataSinkNode in project flink by apache.

the class PipelineBreakingTest method convertPlan.

private static List<DataSinkNode> convertPlan(Plan p) {
    GraphCreatingVisitor dagCreator = new GraphCreatingVisitor(17, p.getExecutionConfig().getExecutionMode());
    // create the DAG
    p.accept(dagCreator);
    List<DataSinkNode> sinks = dagCreator.getSinks();
    // build a single root and run the branch tracking logic
    OptimizerNode rootNode;
    if (sinks.size() == 1) {
        rootNode = sinks.get(0);
    } else {
        Iterator<DataSinkNode> iter = sinks.iterator();
        rootNode = iter.next();
        while (iter.hasNext()) {
            rootNode = new SinkJoiner(rootNode, iter.next());
        }
    }
    rootNode.accept(new IdAndEstimatesVisitor(null));
    rootNode.accept(new BranchesVisitor());
    return sinks;
}
Also used : OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) BranchesVisitor(org.apache.flink.optimizer.traversals.BranchesVisitor) IdAndEstimatesVisitor(org.apache.flink.optimizer.traversals.IdAndEstimatesVisitor) SinkJoiner(org.apache.flink.optimizer.dag.SinkJoiner) GraphCreatingVisitor(org.apache.flink.optimizer.traversals.GraphCreatingVisitor)

Example 4 with DataSinkNode

use of org.apache.flink.optimizer.dag.DataSinkNode in project flink by apache.

the class PreviewPlanDumpTest method dump.

private void dump(Plan p) {
    try {
        List<DataSinkNode> sinks = Optimizer.createPreOptimizedPlan(p);
        PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
        String json = dumper.getPactPlanAsJSON(sinks);
        JsonParser parser = new JsonFactory().createJsonParser(json);
        while (parser.nextToken() != null) ;
    } catch (JsonParseException e) {
        e.printStackTrace();
        Assert.fail("JSON Generator produced malformatted output: " + e.getMessage());
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail("An error occurred in the test: " + e.getMessage());
    }
}
Also used : PlanJSONDumpGenerator(org.apache.flink.optimizer.plandump.PlanJSONDumpGenerator) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) JsonFactory(org.codehaus.jackson.JsonFactory) JsonParseException(org.codehaus.jackson.JsonParseException) JsonParseException(org.codehaus.jackson.JsonParseException) JsonParser(org.codehaus.jackson.JsonParser)

Example 5 with DataSinkNode

use of org.apache.flink.optimizer.dag.DataSinkNode in project flink by apache.

the class PackagedProgram method getPreviewPlan.

/**
	 * Returns the analyzed plan without any optimizations.
	 *
	 * @return
	 *         the analyzed plan without any optimizations.
	 * @throws ProgramInvocationException Thrown if an error occurred in the
	 *  user-provided pact assembler. This may indicate
	 *         missing parameters for generation.
	 */
public String getPreviewPlan() throws ProgramInvocationException {
    Thread.currentThread().setContextClassLoader(this.getUserCodeClassLoader());
    List<DataSinkNode> previewPlan;
    if (isUsingProgramEntryPoint()) {
        previewPlan = Optimizer.createPreOptimizedPlan(getPlan());
    } else if (isUsingInteractiveMode()) {
        // temporary hack to support the web client
        PreviewPlanEnvironment env = new PreviewPlanEnvironment();
        env.setAsContext();
        try {
            invokeInteractiveModeForExecution();
        } catch (ProgramInvocationException e) {
            throw e;
        } catch (Throwable t) {
            // the invocation gets aborted with the preview plan
            if (env.previewPlan != null) {
                previewPlan = env.previewPlan;
            } else if (env.preview != null) {
                return env.preview;
            } else {
                throw new ProgramInvocationException("The program caused an error: ", t);
            }
        } finally {
            env.unsetAsContext();
        }
        if (env.previewPlan != null) {
            previewPlan = env.previewPlan;
        } else {
            throw new ProgramInvocationException("The program plan could not be fetched. The program silently swallowed the control flow exceptions.");
        }
    } else {
        throw new RuntimeException();
    }
    PlanJSONDumpGenerator jsonGen = new PlanJSONDumpGenerator();
    StringWriter string = new StringWriter(1024);
    try (PrintWriter pw = new PrintWriter(string)) {
        jsonGen.dumpPactPlanAsJSON(previewPlan, pw);
    }
    return string.toString();
}
Also used : PlanJSONDumpGenerator(org.apache.flink.optimizer.plandump.PlanJSONDumpGenerator) StringWriter(java.io.StringWriter) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) PrintWriter(java.io.PrintWriter)

Aggregations

DataSinkNode (org.apache.flink.optimizer.dag.DataSinkNode)8 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)3 OptimizerNode (org.apache.flink.optimizer.dag.OptimizerNode)3 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 BinaryUnionNode (org.apache.flink.optimizer.dag.BinaryUnionNode)2 BulkIterationNode (org.apache.flink.optimizer.dag.BulkIterationNode)2 DataSourceNode (org.apache.flink.optimizer.dag.DataSourceNode)2 SingleInputNode (org.apache.flink.optimizer.dag.SingleInputNode)2 WorksetIterationNode (org.apache.flink.optimizer.dag.WorksetIterationNode)2 Test (org.junit.Test)2 PrintWriter (java.io.PrintWriter)1 StringWriter (java.io.StringWriter)1 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 CompilerHints (org.apache.flink.api.common.operators.CompilerHints)1 GenericDataSinkBase (org.apache.flink.api.common.operators.GenericDataSinkBase)1 GenericDataSourceBase (org.apache.flink.api.common.operators.GenericDataSourceBase)1 Union (org.apache.flink.api.common.operators.Union)1 BulkIterationBase (org.apache.flink.api.common.operators.base.BulkIterationBase)1 CoGroupOperatorBase (org.apache.flink.api.common.operators.base.CoGroupOperatorBase)1