Search in sources :

Example 1 with Operator

use of org.apache.flink.api.common.operators.Operator in project flink by apache.

the class NamesTest method testJoinWith.

@Test
public void testJoinWith() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    List<Tuple1<String>> strLi = new ArrayList<Tuple1<String>>();
    strLi.add(new Tuple1<String>("a"));
    strLi.add(new Tuple1<String>("b"));
    DataSet<Tuple1<String>> strs = env.fromCollection(strLi);
    DataSet<Tuple1<String>> strs1 = env.fromCollection(strLi);
    strs.join(strs1).where(0).equalTo(0).with(new FlatJoinFunction<Tuple1<String>, Tuple1<String>, String>() {

        @Override
        public void join(Tuple1<String> first, Tuple1<String> second, Collector<String> out) throws Exception {
        //
        }
    }).output(new DiscardingOutputFormat<String>());
    Plan plan = env.createProgramPlan();
    plan.accept(new Visitor<Operator<?>>() {

        @Override
        public boolean preVisit(Operator<?> visitable) {
            if (visitable instanceof InnerJoinOperatorBase) {
                Assert.assertEquals("Join at testJoinWith(NamesTest.java:93)", visitable.getName());
            }
            return true;
        }

        @Override
        public void postVisit(Operator<?> visitable) {
        }
    });
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) PlanFilterOperator(org.apache.flink.api.java.operators.translation.PlanFilterOperator) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ArrayList(java.util.ArrayList) FlatJoinFunction(org.apache.flink.api.common.functions.FlatJoinFunction) Plan(org.apache.flink.api.common.Plan) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Collector(org.apache.flink.util.Collector) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Test(org.junit.Test)

Example 2 with Operator

use of org.apache.flink.api.common.operators.Operator in project flink by apache.

the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks2.

/**
	 * 
	 * <pre>
	 *                (SRC A)  
	 *                   |
	 *                (MAP A)
	 *             /         \   
	 *          (MAP B)      (MAP C)
	 *           /           /     \
	 *        (SINK A)    (SINK B)  (SINK C)
	 * </pre>
	 */
@SuppressWarnings("unchecked")
@Test
public void testBranchingWithMultipleDataSinks2() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Long> source = env.generateSequence(1, 10000);
        DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
        DataSet<Long> mappedB = mappedA.map(new IdentityMapper<Long>());
        DataSet<Long> mappedC = mappedA.map(new IdentityMapper<Long>());
        mappedB.output(new DiscardingOutputFormat<Long>());
        mappedC.output(new DiscardingOutputFormat<Long>());
        mappedC.output(new DiscardingOutputFormat<Long>());
        Plan plan = env.createProgramPlan();
        Set<Operator<?>> sinks = new HashSet<Operator<?>>(plan.getDataSinks());
        OptimizedPlan oPlan = compileNoStats(plan);
        // ---------- check the optimizer plan ----------
        // number of sinks
        assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
        // remove matching sinks to check relation
        for (SinkPlanNode sink : oPlan.getDataSinks()) {
            assertTrue(sinks.remove(sink.getProgramOperator()));
        }
        assertTrue(sinks.isEmpty());
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with Operator

use of org.apache.flink.api.common.operators.Operator in project flink by apache.

the class OperatorTranslation method translate.

private <T> Operator<T> translate(DataSet<T> dataSet) {
    while (dataSet instanceof NoOpOperator) {
        dataSet = ((NoOpOperator<T>) dataSet).getInput();
    }
    // check if we have already translated that data set (operation or source)
    Operator<?> previous = this.translated.get(dataSet);
    if (previous != null) {
        // The optimizer will merge subsequent binary unions into one n-ary union.
        if (!(dataSet instanceof UnionOperator)) {
            // all other operators are reused.
            @SuppressWarnings("unchecked") Operator<T> typedPrevious = (Operator<T>) previous;
            return typedPrevious;
        }
    }
    Operator<T> dataFlowOp;
    if (dataSet instanceof DataSource) {
        DataSource<T> dataSource = (DataSource<T>) dataSet;
        dataFlowOp = dataSource.translateToDataFlow();
        dataFlowOp.setResources(dataSource.getMinResources(), dataSource.getPreferredResources());
    } else if (dataSet instanceof SingleInputOperator) {
        SingleInputOperator<?, ?, ?> singleInputOperator = (SingleInputOperator<?, ?, ?>) dataSet;
        dataFlowOp = translateSingleInputOperator(singleInputOperator);
        dataFlowOp.setResources(singleInputOperator.getMinResources(), singleInputOperator.getPreferredResources());
    } else if (dataSet instanceof TwoInputOperator) {
        TwoInputOperator<?, ?, ?, ?> twoInputOperator = (TwoInputOperator<?, ?, ?, ?>) dataSet;
        dataFlowOp = translateTwoInputOperator(twoInputOperator);
        dataFlowOp.setResources(twoInputOperator.getMinResources(), twoInputOperator.getPreferredResources());
    } else if (dataSet instanceof BulkIterationResultSet) {
        BulkIterationResultSet<?> bulkIterationResultSet = (BulkIterationResultSet<?>) dataSet;
        dataFlowOp = translateBulkIteration(bulkIterationResultSet);
        dataFlowOp.setResources(bulkIterationResultSet.getIterationHead().getMinResources(), bulkIterationResultSet.getIterationHead().getPreferredResources());
    } else if (dataSet instanceof DeltaIterationResultSet) {
        DeltaIterationResultSet<?, ?> deltaIterationResultSet = (DeltaIterationResultSet<?, ?>) dataSet;
        dataFlowOp = translateDeltaIteration(deltaIterationResultSet);
        dataFlowOp.setResources(deltaIterationResultSet.getIterationHead().getMinResources(), deltaIterationResultSet.getIterationHead().getPreferredResources());
    } else if (dataSet instanceof DeltaIteration.SolutionSetPlaceHolder || dataSet instanceof DeltaIteration.WorksetPlaceHolder) {
        throw new InvalidProgramException("A data set that is part of a delta iteration was used as a sink or action." + " Did you forget to close the iteration?");
    } else {
        throw new RuntimeException("Error while creating the data flow plan for the program: Unknown operator or data set type: " + dataSet);
    }
    this.translated.put(dataSet, dataFlowOp);
    // take care of broadcast variables
    translateBcVariables(dataSet, dataFlowOp);
    return dataFlowOp;
}
Also used : AbstractUdfOperator(org.apache.flink.api.common.operators.AbstractUdfOperator) Operator(org.apache.flink.api.common.operators.Operator) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException)

Example 4 with Operator

use of org.apache.flink.api.common.operators.Operator in project flink by apache.

the class GraphCreatingVisitor method postVisit.

@Override
public void postVisit(Operator<?> c) {
    OptimizerNode n = this.con2node.get(c);
    // first connect to the predecessors
    n.setInput(this.con2node, this.defaultDataExchangeMode);
    n.setBroadcastInputs(this.con2node, this.defaultDataExchangeMode);
    // if the node represents a bulk iteration, we recursively translate the data flow now
    if (n instanceof BulkIterationNode) {
        final BulkIterationNode iterNode = (BulkIterationNode) n;
        final BulkIterationBase<?> iter = iterNode.getIterationContract();
        // pass a copy of the no iterative part into the iteration translation,
        // in case the iteration references its closure
        HashMap<Operator<?>, OptimizerNode> closure = new HashMap<Operator<?>, OptimizerNode>(con2node);
        // first, recursively build the data flow for the step function
        final GraphCreatingVisitor recursiveCreator = new GraphCreatingVisitor(this, true, iterNode.getParallelism(), defaultDataExchangeMode, closure);
        BulkPartialSolutionNode partialSolution;
        iter.getNextPartialSolution().accept(recursiveCreator);
        partialSolution = (BulkPartialSolutionNode) recursiveCreator.con2node.get(iter.getPartialSolution());
        OptimizerNode rootOfStepFunction = recursiveCreator.con2node.get(iter.getNextPartialSolution());
        if (partialSolution == null) {
            throw new CompilerException("Error: The step functions result does not depend on the partial solution.");
        }
        OptimizerNode terminationCriterion = null;
        if (iter.getTerminationCriterion() != null) {
            terminationCriterion = recursiveCreator.con2node.get(iter.getTerminationCriterion());
            // no intermediate node yet, traverse from the termination criterion to build the missing parts
            if (terminationCriterion == null) {
                iter.getTerminationCriterion().accept(recursiveCreator);
                terminationCriterion = recursiveCreator.con2node.get(iter.getTerminationCriterion());
            }
        }
        iterNode.setPartialSolution(partialSolution);
        iterNode.setNextPartialSolution(rootOfStepFunction, terminationCriterion);
        // go over the contained data flow and mark the dynamic path nodes
        StaticDynamicPathIdentifier identifier = new StaticDynamicPathIdentifier(iterNode.getCostWeight());
        iterNode.acceptForStepFunction(identifier);
    } else if (n instanceof WorksetIterationNode) {
        final WorksetIterationNode iterNode = (WorksetIterationNode) n;
        final DeltaIterationBase<?, ?> iter = iterNode.getIterationContract();
        // we need to ensure that both the next-workset and the solution-set-delta depend on the workset.
        // One check is for free during the translation, we do the other check here as a pre-condition
        {
            StepFunctionValidator wsf = new StepFunctionValidator();
            iter.getNextWorkset().accept(wsf);
            if (!wsf.hasFoundWorkset()) {
                throw new CompilerException("In the given program, the next workset does not depend on the workset. " + "This is a prerequisite in delta iterations.");
            }
        }
        // calculate the closure of the anonymous function
        HashMap<Operator<?>, OptimizerNode> closure = new HashMap<Operator<?>, OptimizerNode>(con2node);
        // first, recursively build the data flow for the step function
        final GraphCreatingVisitor recursiveCreator = new GraphCreatingVisitor(this, true, iterNode.getParallelism(), defaultDataExchangeMode, closure);
        // descend from the solution set delta. check that it depends on both the workset
        // and the solution set. If it does depend on both, this descend should create both nodes
        iter.getSolutionSetDelta().accept(recursiveCreator);
        final WorksetNode worksetNode = (WorksetNode) recursiveCreator.con2node.get(iter.getWorkset());
        if (worksetNode == null) {
            throw new CompilerException("In the given program, the solution set delta does not depend on the workset." + "This is a prerequisite in delta iterations.");
        }
        iter.getNextWorkset().accept(recursiveCreator);
        SolutionSetNode solutionSetNode = (SolutionSetNode) recursiveCreator.con2node.get(iter.getSolutionSet());
        if (solutionSetNode == null || solutionSetNode.getOutgoingConnections() == null || solutionSetNode.getOutgoingConnections().isEmpty()) {
            solutionSetNode = new SolutionSetNode((DeltaIterationBase.SolutionSetPlaceHolder<?>) iter.getSolutionSet(), iterNode);
        } else {
            for (DagConnection conn : solutionSetNode.getOutgoingConnections()) {
                OptimizerNode successor = conn.getTarget();
                if (successor.getClass() == JoinNode.class) {
                    // find out which input to the match the solution set is
                    JoinNode mn = (JoinNode) successor;
                    if (mn.getFirstPredecessorNode() == solutionSetNode) {
                        mn.makeJoinWithSolutionSet(0);
                    } else if (mn.getSecondPredecessorNode() == solutionSetNode) {
                        mn.makeJoinWithSolutionSet(1);
                    } else {
                        throw new CompilerException();
                    }
                } else if (successor.getClass() == CoGroupNode.class) {
                    CoGroupNode cg = (CoGroupNode) successor;
                    if (cg.getFirstPredecessorNode() == solutionSetNode) {
                        cg.makeCoGroupWithSolutionSet(0);
                    } else if (cg.getSecondPredecessorNode() == solutionSetNode) {
                        cg.makeCoGroupWithSolutionSet(1);
                    } else {
                        throw new CompilerException();
                    }
                } else {
                    throw new InvalidProgramException("Error: The only operations allowed on the solution set are Join and CoGroup.");
                }
            }
        }
        final OptimizerNode nextWorksetNode = recursiveCreator.con2node.get(iter.getNextWorkset());
        final OptimizerNode solutionSetDeltaNode = recursiveCreator.con2node.get(iter.getSolutionSetDelta());
        // set the step function nodes to the iteration node
        iterNode.setPartialSolution(solutionSetNode, worksetNode);
        iterNode.setNextPartialSolution(solutionSetDeltaNode, nextWorksetNode, defaultDataExchangeMode);
        // go over the contained data flow and mark the dynamic path nodes
        StaticDynamicPathIdentifier pathIdentifier = new StaticDynamicPathIdentifier(iterNode.getCostWeight());
        iterNode.acceptForStepFunction(pathIdentifier);
    }
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) HashMap(java.util.HashMap) WorksetNode(org.apache.flink.optimizer.dag.WorksetNode) JoinNode(org.apache.flink.optimizer.dag.JoinNode) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) CoGroupNode(org.apache.flink.optimizer.dag.CoGroupNode) BulkIterationNode(org.apache.flink.optimizer.dag.BulkIterationNode) SolutionSetNode(org.apache.flink.optimizer.dag.SolutionSetNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) BulkPartialSolutionNode(org.apache.flink.optimizer.dag.BulkPartialSolutionNode) CompilerException(org.apache.flink.optimizer.CompilerException) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) DagConnection(org.apache.flink.optimizer.dag.DagConnection)

Example 5 with Operator

use of org.apache.flink.api.common.operators.Operator in project flink by apache.

the class JoinTranslationTest method createPlanAndGetJoinNode.

private DualInputPlanNode createPlanAndGetJoinNode(JoinHint hint) {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Long> i1 = env.generateSequence(1, 1000);
    DataSet<Long> i2 = env.generateSequence(1, 1000);
    i1.join(i2, hint).where(new IdentityKeySelector<Long>()).equalTo(new IdentityKeySelector<Long>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    Plan plan = env.createProgramPlan();
    // set statistics to the sources
    plan.accept(new Visitor<Operator<?>>() {

        @Override
        public boolean preVisit(Operator<?> visitable) {
            if (visitable instanceof GenericDataSourceBase) {
                GenericDataSourceBase<?, ?> source = (GenericDataSourceBase<?, ?>) visitable;
                setSourceStatistics(source, 10000000, 1000);
            }
            return true;
        }

        @Override
        public void postVisit(Operator<?> visitable) {
        }
    });
    OptimizedPlan op = compileWithStats(plan);
    return (DualInputPlanNode) ((SinkPlanNode) op.getDataSinks().iterator().next()).getInput().getSource();
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) GenericDataSourceBase(org.apache.flink.api.common.operators.GenericDataSourceBase) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Aggregations

Operator (org.apache.flink.api.common.operators.Operator)5 Plan (org.apache.flink.api.common.Plan)3 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)3 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)2 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)2 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)2 Test (org.junit.Test)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 FlatJoinFunction (org.apache.flink.api.common.functions.FlatJoinFunction)1 AbstractUdfOperator (org.apache.flink.api.common.operators.AbstractUdfOperator)1 GenericDataSourceBase (org.apache.flink.api.common.operators.GenericDataSourceBase)1 DeltaIterationBase (org.apache.flink.api.common.operators.base.DeltaIterationBase)1 InnerJoinOperatorBase (org.apache.flink.api.common.operators.base.InnerJoinOperatorBase)1 PlanFilterOperator (org.apache.flink.api.java.operators.translation.PlanFilterOperator)1 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 CompilerException (org.apache.flink.optimizer.CompilerException)1 BulkIterationNode (org.apache.flink.optimizer.dag.BulkIterationNode)1