Search in sources :

Example 1 with FlatMapOperatorBase

use of org.apache.flink.api.common.operators.base.FlatMapOperatorBase in project flink by apache.

the class UnionPropertyPropagationTest method testUnion2.

@Test
public void testUnion2() {
    final int NUM_INPUTS = 4;
    // construct the plan it will be multiple flat maps, all unioned
    // and the "unioned" inputDataSet will be grouped
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> source = env.readTextFile(IN_FILE);
    DataSet<Tuple2<String, Integer>> lastUnion = source.flatMap(new DummyFlatMap());
    for (int i = 1; i < NUM_INPUTS; i++) {
        lastUnion = lastUnion.union(source.flatMap(new DummyFlatMap()));
    }
    DataSet<Tuple2<String, Integer>> result = lastUnion.groupBy(0).aggregate(Aggregations.SUM, 1);
    result.writeAsText(OUT_FILE);
    // return the plan
    Plan plan = env.createProgramPlan("Test union on new java-api");
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    // Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            /* Test on the union output connections
                         * It must be under the GroupOperator and the strategy should be forward
                         */
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
                final Channel inConn = ((SingleInputPlanNode) visitable).getInput();
                Assert.assertTrue("Union should just forward the Partitioning", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
                Assert.assertTrue("Union Node should be under Group operator", inConn.getSource() instanceof NAryUnionPlanNode);
            }
            /* Test on the union input connections
                         * Must be NUM_INPUTS input connections, all FlatMapOperators with a own partitioning strategy (probably hash)
                         */
            if (visitable instanceof NAryUnionPlanNode) {
                int numberInputs = 0;
                for (Iterator<Channel> inputs = visitable.getInputs().iterator(); inputs.hasNext(); numberInputs++) {
                    final Channel inConn = inputs.next();
                    PlanNode inNode = inConn.getSource();
                    Assert.assertTrue("Input of Union should be FlatMapOperators", inNode.getProgramOperator() instanceof FlatMapOperatorBase);
                    Assert.assertTrue("Shipment strategy under union should partition the data", inConn.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
                }
                Assert.assertTrue("NAryUnion should have " + NUM_INPUTS + " inputs", numberInputs == NUM_INPUTS);
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Iterator(java.util.Iterator) Test(org.junit.Test)

Example 2 with FlatMapOperatorBase

use of org.apache.flink.api.common.operators.base.FlatMapOperatorBase in project flink by apache.

the class GraphCreatingVisitor method preVisit.

@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
    // check if we have been here before
    if (this.con2node.containsKey(c)) {
        return false;
    }
    final OptimizerNode n;
    // create a node for the operator (or sink or source) if we have not been here before
    if (c instanceof GenericDataSinkBase) {
        DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
        this.sinks.add(dsn);
        n = dsn;
    } else if (c instanceof GenericDataSourceBase) {
        n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
    } else if (c instanceof MapOperatorBase) {
        n = new MapNode((MapOperatorBase<?, ?, ?>) c);
    } else if (c instanceof MapPartitionOperatorBase) {
        n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
    } else if (c instanceof FlatMapOperatorBase) {
        n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
    } else if (c instanceof FilterOperatorBase) {
        n = new FilterNode((FilterOperatorBase<?, ?>) c);
    } else if (c instanceof ReduceOperatorBase) {
        n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
    } else if (c instanceof GroupCombineOperatorBase) {
        n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
    } else if (c instanceof GroupReduceOperatorBase) {
        n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
    } else if (c instanceof InnerJoinOperatorBase) {
        n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof OuterJoinOperatorBase) {
        n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CoGroupOperatorBase) {
        n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CoGroupRawOperatorBase) {
        n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CrossOperatorBase) {
        n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof BulkIterationBase) {
        n = new BulkIterationNode((BulkIterationBase<?>) c);
    } else if (c instanceof DeltaIterationBase) {
        n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
    } else if (c instanceof Union) {
        n = new BinaryUnionNode((Union<?>) c);
    } else if (c instanceof PartitionOperatorBase) {
        n = new PartitionNode((PartitionOperatorBase<?>) c);
    } else if (c instanceof SortPartitionOperatorBase) {
        n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
    } else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
        final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
        final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
        final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
        final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        WorksetNode p = new WorksetNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
        final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
        final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else {
        throw new IllegalArgumentException("Unknown operator type: " + c);
    }
    this.con2node.put(c, n);
    // key-less reducer (all-reduce)
    if (n.getParallelism() < 1) {
        // set the parallelism
        int par = c.getParallelism();
        if (n instanceof BinaryUnionNode) {
            // Keep parallelism of union undefined for now.
            // It will be determined based on the parallelism of its successor.
            par = -1;
        } else if (par > 0) {
            if (this.forceParallelism && par != this.defaultParallelism) {
                par = this.defaultParallelism;
                Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
            }
        } else {
            par = this.defaultParallelism;
        }
        n.setParallelism(par);
    }
    return true;
}
Also used : FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) FilterNode(org.apache.flink.optimizer.dag.FilterNode) CrossOperatorBase(org.apache.flink.api.common.operators.base.CrossOperatorBase) CoGroupOperatorBase(org.apache.flink.api.common.operators.base.CoGroupOperatorBase) BulkPartialSolutionNode(org.apache.flink.optimizer.dag.BulkPartialSolutionNode) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) SortPartitionOperatorBase(org.apache.flink.api.common.operators.base.SortPartitionOperatorBase) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) JoinNode(org.apache.flink.optimizer.dag.JoinNode) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) OuterJoinOperatorBase(org.apache.flink.api.common.operators.base.OuterJoinOperatorBase) FlatMapNode(org.apache.flink.optimizer.dag.FlatMapNode) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) BulkIterationBase(org.apache.flink.api.common.operators.base.BulkIterationBase) CoGroupRawOperatorBase(org.apache.flink.api.common.operators.base.CoGroupRawOperatorBase) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) WorksetNode(org.apache.flink.optimizer.dag.WorksetNode) CoGroupNode(org.apache.flink.optimizer.dag.CoGroupNode) FlatMapNode(org.apache.flink.optimizer.dag.FlatMapNode) MapNode(org.apache.flink.optimizer.dag.MapNode) GroupCombineNode(org.apache.flink.optimizer.dag.GroupCombineNode) Union(org.apache.flink.api.common.operators.Union) FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) SolutionSetNode(org.apache.flink.optimizer.dag.SolutionSetNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) DataSourceNode(org.apache.flink.optimizer.dag.DataSourceNode) SortPartitionNode(org.apache.flink.optimizer.dag.SortPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) PartitionOperatorBase(org.apache.flink.api.common.operators.base.PartitionOperatorBase) SortPartitionOperatorBase(org.apache.flink.api.common.operators.base.SortPartitionOperatorBase) CoGroupRawNode(org.apache.flink.optimizer.dag.CoGroupRawNode) BinaryUnionNode(org.apache.flink.optimizer.dag.BinaryUnionNode) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) PartitionNode(org.apache.flink.optimizer.dag.PartitionNode) SortPartitionNode(org.apache.flink.optimizer.dag.SortPartitionNode) FilterOperatorBase(org.apache.flink.api.common.operators.base.FilterOperatorBase) GroupCombineOperatorBase(org.apache.flink.api.common.operators.base.GroupCombineOperatorBase) BulkIterationNode(org.apache.flink.optimizer.dag.BulkIterationNode) ReduceNode(org.apache.flink.optimizer.dag.ReduceNode) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) GenericDataSourceBase(org.apache.flink.api.common.operators.GenericDataSourceBase) CrossNode(org.apache.flink.optimizer.dag.CrossNode)

Aggregations

FlatMapOperatorBase (org.apache.flink.api.common.operators.base.FlatMapOperatorBase)2 GroupReduceOperatorBase (org.apache.flink.api.common.operators.base.GroupReduceOperatorBase)2 Iterator (java.util.Iterator)1 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 Plan (org.apache.flink.api.common.Plan)1 GenericDataSinkBase (org.apache.flink.api.common.operators.GenericDataSinkBase)1 GenericDataSourceBase (org.apache.flink.api.common.operators.GenericDataSourceBase)1 Union (org.apache.flink.api.common.operators.Union)1 BulkIterationBase (org.apache.flink.api.common.operators.base.BulkIterationBase)1 CoGroupOperatorBase (org.apache.flink.api.common.operators.base.CoGroupOperatorBase)1 CoGroupRawOperatorBase (org.apache.flink.api.common.operators.base.CoGroupRawOperatorBase)1 CrossOperatorBase (org.apache.flink.api.common.operators.base.CrossOperatorBase)1 DeltaIterationBase (org.apache.flink.api.common.operators.base.DeltaIterationBase)1 FilterOperatorBase (org.apache.flink.api.common.operators.base.FilterOperatorBase)1 GroupCombineOperatorBase (org.apache.flink.api.common.operators.base.GroupCombineOperatorBase)1 InnerJoinOperatorBase (org.apache.flink.api.common.operators.base.InnerJoinOperatorBase)1 MapOperatorBase (org.apache.flink.api.common.operators.base.MapOperatorBase)1 MapPartitionOperatorBase (org.apache.flink.api.common.operators.base.MapPartitionOperatorBase)1 OuterJoinOperatorBase (org.apache.flink.api.common.operators.base.OuterJoinOperatorBase)1 PartitionOperatorBase (org.apache.flink.api.common.operators.base.PartitionOperatorBase)1