Search in sources :

Example 1 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithRebalance.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by REBALANCE is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 -> Rebalance -> Output
	 * Src3 ----------------/
	 *
	 * In the resulting plan, the Rebalance (ShippingStrategy.PARTITION_FORCED_REBALANCE) must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithRebalance() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.rebalance().output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be force rebalanced.", PartitioningProperty.FORCED_REBALANCED, sink.getInput().getGlobalProperties().getPartitioning());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be force rebalanced.", PartitioningProperty.FORCED_REBALANCED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be force rebalanced
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be force rebalanced", PartitioningProperty.FORCED_REBALANCED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be rebalancing", ShipStrategyType.PARTITION_FORCED_REBALANCE, c.getShipStrategy());
        assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Channel(org.apache.flink.optimizer.plan.Channel) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 2 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class UnionReplacementTest method testUnionWithTwoOutputs.

/**
	 *
	 * Test for FLINK-2662.
	 *
	 * Checks that a plan with an union with two outputs is correctly translated.
	 * The program can be illustrated as follows:
	 *
	 * Src1 ----------------\
	 *                       >-> Union123 -> GroupBy(0) -> Sum -> Output
	 * Src2 -\              /
	 *        >-> Union23--<
	 * Src3 -/              \
	 *                       >-> Union234 -> GroupBy(1) -> Sum -> Output
	 * Src4 ----------------/
	 *
	 * The fix for FLINK-2662 translates the union with two output (Union-23) into two separate
	 * unions (Union-23_1 and Union-23_2) with one output each. Due to this change, the interesting
	 * partitioning properties for GroupBy(0) and GroupBy(1) are pushed through Union-23_1 and
	 * Union-23_2 and do not interfere with each other (which would be the case if Union-23 would
	 * be a single operator with two outputs).
	 *
	 */
@Test
public void testUnionWithTwoOutputs() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src4 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union23 = src2.union(src3);
    DataSet<Tuple2<Long, Long>> union123 = src1.union(union23);
    DataSet<Tuple2<Long, Long>> union234 = src4.union(union23);
    union123.groupBy(0).sum(1).name("1").output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    union234.groupBy(1).sum(0).name("2").output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode groupRed1 = resolver.getNode("1");
    SingleInputPlanNode groupRed2 = resolver.getNode("2");
    // check partitioning is correct
    assertTrue("Reduce input should be partitioned on 0.", groupRed1.getInput().getGlobalProperties().getPartitioningFields().isExactMatch(new FieldList(0)));
    assertTrue("Reduce input should be partitioned on 1.", groupRed2.getInput().getGlobalProperties().getPartitioningFields().isExactMatch(new FieldList(1)));
    // check group reduce inputs are n-ary unions with three inputs
    assertTrue("Reduce input should be n-ary union with three inputs.", groupRed1.getInput().getSource() instanceof NAryUnionPlanNode && ((NAryUnionPlanNode) groupRed1.getInput().getSource()).getListOfInputs().size() == 3);
    assertTrue("Reduce input should be n-ary union with three inputs.", groupRed2.getInput().getSource() instanceof NAryUnionPlanNode && ((NAryUnionPlanNode) groupRed2.getInput().getSource()).getListOfInputs().size() == 3);
    // check channel from union to group reduce is forwarding
    assertTrue("Channel between union and group reduce should be forwarding", groupRed1.getInput().getShipStrategy().equals(ShipStrategyType.FORWARD));
    assertTrue("Channel between union and group reduce should be forwarding", groupRed2.getInput().getShipStrategy().equals(ShipStrategyType.FORWARD));
    // check that all inputs of unions are hash partitioned
    List<Channel> union123In = ((NAryUnionPlanNode) groupRed1.getInput().getSource()).getListOfInputs();
    for (Channel i : union123In) {
        assertTrue("Union input channel should hash partition on 0", i.getShipStrategy().equals(ShipStrategyType.PARTITION_HASH) && i.getShipStrategyKeys().isExactMatch(new FieldList(0)));
    }
    List<Channel> union234In = ((NAryUnionPlanNode) groupRed2.getInput().getSource()).getListOfInputs();
    for (Channel i : union234In) {
        assertTrue("Union input channel should hash partition on 0", i.getShipStrategy().equals(ShipStrategyType.PARTITION_HASH) && i.getShipStrategyKeys().isExactMatch(new FieldList(1)));
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Channel(org.apache.flink.optimizer.plan.Channel) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) Test(org.junit.Test)

Example 3 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class JobGraphGenerator method translateChannel.

private int translateChannel(Channel input, int inputIndex, JobVertex targetVertex, TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception {
    final PlanNode inputPlanNode = input.getSource();
    final Iterator<Channel> allInChannels;
    if (inputPlanNode instanceof NAryUnionPlanNode) {
        allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
        // deadlocks when closing a branching flow at runtime.
        for (Channel in : inputPlanNode.getInputs()) {
            if (input.getDataExchangeMode().equals(DataExchangeMode.BATCH)) {
                in.setDataExchangeMode(DataExchangeMode.BATCH);
            }
            if (isBroadcast) {
                in.setShipStrategy(ShipStrategyType.BROADCAST, in.getDataExchangeMode());
            }
        }
    } else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
        if (this.vertices.get(inputPlanNode) == null) {
            // merged iteration head
            final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
            final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
            // check if the iteration's input is a union
            if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
                allInChannels = (iterationNode.getInput().getSource()).getInputs().iterator();
            } else {
                allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
            }
            // also, set the index of the gate with the partial solution
            targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
        } else {
            // standalone iteration head
            allInChannels = Collections.singletonList(input).iterator();
        }
    } else if (inputPlanNode instanceof WorksetPlanNode) {
        if (this.vertices.get(inputPlanNode) == null) {
            // merged iteration head
            final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
            final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
            // check if the iteration's input is a union
            if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
                allInChannels = (iterationNode.getInput2().getSource()).getInputs().iterator();
            } else {
                allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
            }
            // also, set the index of the gate with the partial solution
            targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
        } else {
            // standalone iteration head
            allInChannels = Collections.singletonList(input).iterator();
        }
    } else if (inputPlanNode instanceof SolutionSetPlanNode) {
        // rather than a vertex connection
        return 0;
    } else {
        allInChannels = Collections.singletonList(input).iterator();
    }
    // check that the type serializer is consistent
    TypeSerializerFactory<?> typeSerFact = null;
    // accounting for channels on the dynamic path
    int numChannelsTotal = 0;
    int numChannelsDynamicPath = 0;
    int numDynamicSenderTasksTotal = 0;
    // expand the channel to all the union channels, in case there is a union operator at its source
    while (allInChannels.hasNext()) {
        final Channel inConn = allInChannels.next();
        // sanity check the common serializer
        if (typeSerFact == null) {
            typeSerFact = inConn.getSerializer();
        } else if (!typeSerFact.equals(inConn.getSerializer())) {
            throw new CompilerException("Conflicting types in union operator.");
        }
        final PlanNode sourceNode = inConn.getSource();
        JobVertex sourceVertex = this.vertices.get(sourceNode);
        TaskConfig sourceVertexConfig;
        if (sourceVertex == null) {
            // this predecessor is chained to another task or an iteration
            final TaskInChain chainedTask;
            final IterationDescriptor iteration;
            if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
                // push chained task
                if (chainedTask.getContainingVertex() == null) {
                    throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
                }
                sourceVertex = chainedTask.getContainingVertex();
                sourceVertexConfig = chainedTask.getTaskConfig();
            } else if ((iteration = this.iterations.get(sourceNode)) != null) {
                // predecessor is an iteration
                sourceVertex = iteration.getHeadTask();
                sourceVertexConfig = iteration.getHeadFinalResultConfig();
            } else {
                throw new CompilerException("Bug: Could not resolve source node for a channel.");
            }
        } else {
            // predecessor is its own vertex
            sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
        }
        DistributionPattern pattern = connectJobVertices(inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
        // accounting on channels and senders
        numChannelsTotal++;
        if (inConn.isOnDynamicPath()) {
            numChannelsDynamicPath++;
            numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern, sourceVertex.getParallelism(), targetVertex.getParallelism());
        }
    }
    // is a union between nodes on the static and nodes on the dynamic path
    if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
        throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
    }
    if (numDynamicSenderTasksTotal > 0) {
        if (isBroadcast) {
            targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
        } else {
            targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
        }
    }
    // the local strategy is added only once. in non-union case that is the actual edge,
    // in the union case, it is the edge between union and the target node
    addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
    return 1;
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 4 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class JobGraphGenerator method createBulkIterationHead.

private JobVertex createBulkIterationHead(BulkPartialSolutionPlanNode pspn) {
    // get the bulk iteration that corresponds to this partial solution node
    final BulkIterationPlanNode iteration = pspn.getContainingIterationNode();
    // check whether we need an individual vertex for the partial solution, or whether we
    // attach ourselves to the vertex of the parent node. We can combine the head with a node of 
    // the step function, if
    // 1) There is one parent that the partial solution connects to via a forward pattern and no
    //    local strategy
    // 2) parallelism and the number of subtasks per instance does not change
    // 3) That successor is not a union
    // 4) That successor is not itself the last node of the step function
    // 5) There is no local strategy on the edge for the initial partial solution, as
    //    this translates to a local strategy that would only be executed in the first iteration
    final boolean merge;
    if (mergeIterationAuxTasks && pspn.getOutgoingChannels().size() == 1) {
        final Channel c = pspn.getOutgoingChannels().get(0);
        final PlanNode successor = c.getTarget();
        merge = c.getShipStrategy() == ShipStrategyType.FORWARD && c.getLocalStrategy() == LocalStrategy.NONE && c.getTempMode() == TempMode.NONE && successor.getParallelism() == pspn.getParallelism() && !(successor instanceof NAryUnionPlanNode) && successor != iteration.getRootOfStepFunction() && iteration.getInput().getLocalStrategy() == LocalStrategy.NONE;
    } else {
        merge = false;
    }
    // create or adopt the head vertex
    final JobVertex toReturn;
    final JobVertex headVertex;
    final TaskConfig headConfig;
    if (merge) {
        final PlanNode successor = pspn.getOutgoingChannels().get(0).getTarget();
        headVertex = this.vertices.get(successor);
        if (headVertex == null) {
            throw new CompilerException("Bug: Trying to merge solution set with its successor, but successor has not been created.");
        }
        // reset the vertex type to iteration head
        headVertex.setInvokableClass(IterationHeadTask.class);
        headConfig = new TaskConfig(headVertex.getConfiguration());
        toReturn = null;
    } else {
        // instantiate the head vertex and give it a no-op driver as the driver strategy.
        // everything else happens in the post visit, after the input (the initial partial solution)
        // is connected.
        headVertex = new JobVertex("PartialSolution (" + iteration.getNodeName() + ")");
        headVertex.setResources(iteration.getMinResources(), iteration.getPreferredResources());
        headVertex.setInvokableClass(IterationHeadTask.class);
        headConfig = new TaskConfig(headVertex.getConfiguration());
        headConfig.setDriver(NoOpDriver.class);
        toReturn = headVertex;
    }
    // create the iteration descriptor and the iteration to it
    IterationDescriptor descr = this.iterations.get(iteration);
    if (descr == null) {
        throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
    }
    descr.setHeadTask(headVertex, headConfig);
    return toReturn;
}
Also used : NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) CompilerException(org.apache.flink.optimizer.CompilerException) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 5 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class JobGraphGenerator method preVisit.

/**
	 * This methods implements the pre-visiting during a depth-first traversal. It create the job vertex and
	 * sets local strategy.
	 * 
	 * @param node
	 *        The node that is currently processed.
	 * @return True, if the visitor should descend to the node's children, false if not.
	 * @see org.apache.flink.util.Visitor#preVisit(org.apache.flink.util.Visitable)
	 */
@Override
public boolean preVisit(PlanNode node) {
    // check if we have visited this node before. in non-tree graphs, this happens
    if (this.vertices.containsKey(node) || this.chainedTasks.containsKey(node) || this.iterations.containsKey(node)) {
        // return false to prevent further descend
        return false;
    }
    // the vertex to be created for the current node
    final JobVertex vertex;
    try {
        if (node instanceof SinkPlanNode) {
            vertex = createDataSinkVertex((SinkPlanNode) node);
        } else if (node instanceof SourcePlanNode) {
            vertex = createDataSourceVertex((SourcePlanNode) node);
        } else if (node instanceof BulkIterationPlanNode) {
            BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
            // for the bulk iteration, we skip creating anything for now. we create the graph
            // for the step function in the post visit.
            // check that the root of the step function has the same parallelism as the iteration.
            // because the tail must have the same parallelism as the head, we can only merge the last
            // operator with the tail, if they have the same parallelism. not merging is currently not
            // implemented
            PlanNode root = iterationNode.getRootOfStepFunction();
            if (root.getParallelism() != node.getParallelism()) {
                throw new CompilerException("Error: The final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
            this.iterations.put(iterationNode, descr);
            vertex = null;
        } else if (node instanceof WorksetIterationPlanNode) {
            WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
            // we have the same constraints as for the bulk iteration
            PlanNode nextWorkSet = iterationNode.getNextWorkSetPlanNode();
            PlanNode solutionSetDelta = iterationNode.getSolutionSetDeltaPlanNode();
            if (nextWorkSet.getParallelism() != node.getParallelism()) {
                throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            if (solutionSetDelta.getParallelism() != node.getParallelism()) {
                throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
            this.iterations.put(iterationNode, descr);
            vertex = null;
        } else if (node instanceof SingleInputPlanNode) {
            vertex = createSingleInputVertex((SingleInputPlanNode) node);
        } else if (node instanceof DualInputPlanNode) {
            vertex = createDualInputVertex((DualInputPlanNode) node);
        } else if (node instanceof NAryUnionPlanNode) {
            // skip the union for now
            vertex = null;
        } else if (node instanceof BulkPartialSolutionPlanNode) {
            // create a head node (or not, if it is merged into its successor)
            vertex = createBulkIterationHead((BulkPartialSolutionPlanNode) node);
        } else if (node instanceof SolutionSetPlanNode) {
            // we adjust the joins / cogroups that go into the solution set here
            for (Channel c : node.getOutgoingChannels()) {
                DualInputPlanNode target = (DualInputPlanNode) c.getTarget();
                JobVertex accessingVertex = this.vertices.get(target);
                TaskConfig conf = new TaskConfig(accessingVertex.getConfiguration());
                int inputNum = c == target.getInput1() ? 0 : c == target.getInput2() ? 1 : -1;
                // sanity checks
                if (inputNum == -1) {
                    throw new CompilerException();
                }
                // adjust the driver
                if (conf.getDriver().equals(JoinDriver.class)) {
                    conf.setDriver(inputNum == 0 ? JoinWithSolutionSetFirstDriver.class : JoinWithSolutionSetSecondDriver.class);
                } else if (conf.getDriver().equals(CoGroupDriver.class)) {
                    conf.setDriver(inputNum == 0 ? CoGroupWithSolutionSetFirstDriver.class : CoGroupWithSolutionSetSecondDriver.class);
                } else {
                    throw new CompilerException("Found join with solution set using incompatible operator (only Join/CoGroup are valid).");
                }
            }
            // make sure we do not visit this node again. for that, we add a 'already seen' entry into one of the sets
            this.chainedTasks.put(node, ALREADY_VISITED_PLACEHOLDER);
            vertex = null;
        } else if (node instanceof WorksetPlanNode) {
            // create the iteration head here
            vertex = createWorksetIterationHead((WorksetPlanNode) node);
        } else {
            throw new CompilerException("Unrecognized node type: " + node.getClass().getName());
        }
    } catch (Exception e) {
        throw new CompilerException("Error translating node '" + node + "': " + e.getMessage(), e);
    }
    // check if a vertex was created, or if it was chained or skipped
    if (vertex != null) {
        // set parallelism
        int pd = node.getParallelism();
        vertex.setParallelism(pd);
        vertex.setMaxParallelism(pd);
        vertex.setSlotSharingGroup(sharingGroup);
        // check whether this vertex is part of an iteration step function
        if (this.currentIteration != null) {
            // check that the task has the same parallelism as the iteration as such
            PlanNode iterationNode = (PlanNode) this.currentIteration;
            if (iterationNode.getParallelism() < pd) {
                throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, parallelism than the iteration operator.");
            }
            // store the id of the iterations the step functions participate in
            IterationDescriptor descr = this.iterations.get(this.currentIteration);
            new TaskConfig(vertex.getConfiguration()).setIterationId(descr.getId());
        }
        // store in the map
        this.vertices.put(node, vertex);
    }
    // returning true causes deeper descend
    return true;
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) JoinWithSolutionSetFirstDriver(org.apache.flink.runtime.operators.JoinWithSolutionSetFirstDriver) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) JoinWithSolutionSetSecondDriver(org.apache.flink.runtime.operators.JoinWithSolutionSetSecondDriver) IOException(java.io.IOException) CompilerException(org.apache.flink.optimizer.CompilerException) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) CoGroupDriver(org.apache.flink.runtime.operators.CoGroupDriver)

Aggregations

NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)19 Channel (org.apache.flink.optimizer.plan.Channel)17 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)17 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)11 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)10 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)10 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)10 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)10 Test (org.junit.Test)10 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)9 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)9 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)8 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)8 WorksetPlanNode (org.apache.flink.optimizer.plan.WorksetPlanNode)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 CompilerException (org.apache.flink.optimizer.CompilerException)7 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)6 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)6 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)6