Search in sources :

Example 1 with SolutionSetPlanNode

use of org.apache.flink.optimizer.plan.SolutionSetPlanNode in project flink by apache.

the class JobGraphGenerator method translateChannel.

private int translateChannel(Channel input, int inputIndex, JobVertex targetVertex, TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception {
    final PlanNode inputPlanNode = input.getSource();
    final Iterator<Channel> allInChannels;
    if (inputPlanNode instanceof NAryUnionPlanNode) {
        allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
        // deadlocks when closing a branching flow at runtime.
        for (Channel in : inputPlanNode.getInputs()) {
            if (input.getDataExchangeMode().equals(DataExchangeMode.BATCH)) {
                in.setDataExchangeMode(DataExchangeMode.BATCH);
            }
            if (isBroadcast) {
                in.setShipStrategy(ShipStrategyType.BROADCAST, in.getDataExchangeMode());
            }
        }
        // The outgoing connection of an NAryUnion must be a forward connection.
        if (input.getShipStrategy() != ShipStrategyType.FORWARD && !isBroadcast) {
            throw new CompilerException("Optimized plan contains Union with non-forward outgoing ship strategy.");
        }
    } else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
        if (this.vertices.get(inputPlanNode) == null) {
            // merged iteration head
            final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
            final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
            // check if the iteration's input is a union
            if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
                allInChannels = (iterationNode.getInput().getSource()).getInputs().iterator();
            } else {
                allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
            }
            // also, set the index of the gate with the partial solution
            targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
        } else {
            // standalone iteration head
            allInChannels = Collections.singletonList(input).iterator();
        }
    } else if (inputPlanNode instanceof WorksetPlanNode) {
        if (this.vertices.get(inputPlanNode) == null) {
            // merged iteration head
            final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
            final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
            // check if the iteration's input is a union
            if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
                allInChannels = (iterationNode.getInput2().getSource()).getInputs().iterator();
            } else {
                allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
            }
            // also, set the index of the gate with the partial solution
            targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
        } else {
            // standalone iteration head
            allInChannels = Collections.singletonList(input).iterator();
        }
    } else if (inputPlanNode instanceof SolutionSetPlanNode) {
        // rather than a vertex connection
        return 0;
    } else {
        allInChannels = Collections.singletonList(input).iterator();
    }
    // check that the type serializer is consistent
    TypeSerializerFactory<?> typeSerFact = null;
    // accounting for channels on the dynamic path
    int numChannelsTotal = 0;
    int numChannelsDynamicPath = 0;
    int numDynamicSenderTasksTotal = 0;
    // source
    while (allInChannels.hasNext()) {
        final Channel inConn = allInChannels.next();
        // sanity check the common serializer
        if (typeSerFact == null) {
            typeSerFact = inConn.getSerializer();
        } else if (!typeSerFact.equals(inConn.getSerializer())) {
            throw new CompilerException("Conflicting types in union operator.");
        }
        final PlanNode sourceNode = inConn.getSource();
        JobVertex sourceVertex = this.vertices.get(sourceNode);
        TaskConfig sourceVertexConfig;
        if (sourceVertex == null) {
            // this predecessor is chained to another task or an iteration
            final TaskInChain chainedTask;
            final IterationDescriptor iteration;
            if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
                // push chained task
                if (chainedTask.getContainingVertex() == null) {
                    throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
                }
                sourceVertex = chainedTask.getContainingVertex();
                sourceVertexConfig = chainedTask.getTaskConfig();
            } else if ((iteration = this.iterations.get(sourceNode)) != null) {
                // predecessor is an iteration
                sourceVertex = iteration.getHeadTask();
                sourceVertexConfig = iteration.getHeadFinalResultConfig();
            } else {
                throw new CompilerException("Bug: Could not resolve source node for a channel.");
            }
        } else {
            // predecessor is its own vertex
            sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
        }
        DistributionPattern pattern = connectJobVertices(inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
        // accounting on channels and senders
        numChannelsTotal++;
        if (inConn.isOnDynamicPath()) {
            numChannelsDynamicPath++;
            numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern, sourceVertex.getParallelism(), targetVertex.getParallelism());
        }
    }
    // is a union between nodes on the static and nodes on the dynamic path
    if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
        throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
    }
    if (numDynamicSenderTasksTotal > 0) {
        if (isBroadcast) {
            targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
        } else {
            targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
        }
    }
    // the local strategy is added only once. in non-union case that is the actual edge,
    // in the union case, it is the edge between union and the target node
    addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
    return 1;
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 2 with SolutionSetPlanNode

use of org.apache.flink.optimizer.plan.SolutionSetPlanNode in project flink by apache.

the class PlanFinalizer method preVisit.

@Override
public boolean preVisit(PlanNode visitable) {
    // if we come here again, prevent a further descend
    if (!this.allNodes.add(visitable)) {
        return false;
    }
    if (visitable instanceof SinkPlanNode) {
        this.sinks.add((SinkPlanNode) visitable);
    } else if (visitable instanceof SourcePlanNode) {
        this.sources.add((SourcePlanNode) visitable);
    } else if (visitable instanceof BinaryUnionPlanNode) {
        BinaryUnionPlanNode unionNode = (BinaryUnionPlanNode) visitable;
        if (unionNode.unionsStaticAndDynamicPath()) {
            unionNode.setDriverStrategy(DriverStrategy.UNION_WITH_CACHED);
        }
    } else if (visitable instanceof BulkPartialSolutionPlanNode) {
        // tell the partial solution about the iteration node that contains it
        final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) visitable;
        final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
        // sanity check!
        if (!(iteration instanceof BulkIterationPlanNode)) {
            throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
        }
        pspn.setContainingIterationNode((BulkIterationPlanNode) iteration);
    } else if (visitable instanceof WorksetPlanNode) {
        // tell the partial solution about the iteration node that contains it
        final WorksetPlanNode wspn = (WorksetPlanNode) visitable;
        final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
        // sanity check!
        if (!(iteration instanceof WorksetIterationPlanNode)) {
            throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
        }
        wspn.setContainingIterationNode((WorksetIterationPlanNode) iteration);
    } else if (visitable instanceof SolutionSetPlanNode) {
        // tell the partial solution about the iteration node that contains it
        final SolutionSetPlanNode sspn = (SolutionSetPlanNode) visitable;
        final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
        // sanity check!
        if (!(iteration instanceof WorksetIterationPlanNode)) {
            throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
        }
        sspn.setContainingIterationNode((WorksetIterationPlanNode) iteration);
    }
    // one child candidate could have been referenced by multiple parents.
    for (Channel conn : visitable.getInputs()) {
        conn.setTarget(visitable);
        conn.getSource().addOutgoingChannel(conn);
    }
    for (Channel c : visitable.getBroadcastInputs()) {
        c.setTarget(visitable);
        c.getSource().addOutgoingChannel(c);
    }
    // count the memory consumption
    this.memoryConsumerWeights += visitable.getMemoryConsumerWeight();
    for (Channel c : visitable.getInputs()) {
        if (c.getLocalStrategy().dams()) {
            this.memoryConsumerWeights++;
        }
        if (c.getTempMode() != TempMode.NONE) {
            this.memoryConsumerWeights++;
        }
    }
    for (Channel c : visitable.getBroadcastInputs()) {
        if (c.getLocalStrategy().dams()) {
            this.memoryConsumerWeights++;
        }
        if (c.getTempMode() != TempMode.NONE) {
            this.memoryConsumerWeights++;
        }
    }
    // pass the visitor to the iteration's step function
    if (visitable instanceof IterationPlanNode) {
        // push the iteration node onto the stack
        final IterationPlanNode iterNode = (IterationPlanNode) visitable;
        this.stackOfIterationNodes.addLast(iterNode);
        // recurse
        ((IterationPlanNode) visitable).acceptForStepFunction(this);
        // pop the iteration node from the stack
        this.stackOfIterationNodes.removeLast();
    }
    return true;
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) BinaryUnionPlanNode(org.apache.flink.optimizer.plan.BinaryUnionPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode)

Example 3 with SolutionSetPlanNode

use of org.apache.flink.optimizer.plan.SolutionSetPlanNode in project flink by apache.

the class JobGraphGenerator method preVisit.

/**
 * This methods implements the pre-visiting during a depth-first traversal. It create the job
 * vertex and sets local strategy.
 *
 * @param node The node that is currently processed.
 * @return True, if the visitor should descend to the node's children, false if not.
 * @see org.apache.flink.util.Visitor#preVisit(org.apache.flink.util.Visitable)
 */
@Override
public boolean preVisit(PlanNode node) {
    // check if we have visited this node before. in non-tree graphs, this happens
    if (this.vertices.containsKey(node) || this.chainedTasks.containsKey(node) || this.iterations.containsKey(node)) {
        // return false to prevent further descend
        return false;
    }
    // the vertex to be created for the current node
    final JobVertex vertex;
    try {
        if (node instanceof SinkPlanNode) {
            vertex = createDataSinkVertex((SinkPlanNode) node);
        } else if (node instanceof SourcePlanNode) {
            vertex = createDataSourceVertex((SourcePlanNode) node);
        } else if (node instanceof BulkIterationPlanNode) {
            BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
            // for the bulk iteration, we skip creating anything for now. we create the graph
            // for the step function in the post visit.
            // check that the root of the step function has the same parallelism as the
            // iteration.
            // because the tail must have the same parallelism as the head, we can only merge
            // the last
            // operator with the tail, if they have the same parallelism. not merging is
            // currently not
            // implemented
            PlanNode root = iterationNode.getRootOfStepFunction();
            if (root.getParallelism() != node.getParallelism()) {
                throw new CompilerException("Error: The final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
            this.iterations.put(iterationNode, descr);
            vertex = null;
        } else if (node instanceof WorksetIterationPlanNode) {
            WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
            // we have the same constraints as for the bulk iteration
            PlanNode nextWorkSet = iterationNode.getNextWorkSetPlanNode();
            PlanNode solutionSetDelta = iterationNode.getSolutionSetDeltaPlanNode();
            if (nextWorkSet.getParallelism() != node.getParallelism()) {
                throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            if (solutionSetDelta.getParallelism() != node.getParallelism()) {
                throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
            this.iterations.put(iterationNode, descr);
            vertex = null;
        } else if (node instanceof SingleInputPlanNode) {
            vertex = createSingleInputVertex((SingleInputPlanNode) node);
        } else if (node instanceof DualInputPlanNode) {
            vertex = createDualInputVertex((DualInputPlanNode) node);
        } else if (node instanceof NAryUnionPlanNode) {
            // skip the union for now
            vertex = null;
        } else if (node instanceof BulkPartialSolutionPlanNode) {
            // create a head node (or not, if it is merged into its successor)
            vertex = createBulkIterationHead((BulkPartialSolutionPlanNode) node);
        } else if (node instanceof SolutionSetPlanNode) {
            // we adjust the joins / cogroups that go into the solution set here
            for (Channel c : node.getOutgoingChannels()) {
                DualInputPlanNode target = (DualInputPlanNode) c.getTarget();
                JobVertex accessingVertex = this.vertices.get(target);
                TaskConfig conf = new TaskConfig(accessingVertex.getConfiguration());
                int inputNum = c == target.getInput1() ? 0 : c == target.getInput2() ? 1 : -1;
                // sanity checks
                if (inputNum == -1) {
                    throw new CompilerException();
                }
                // adjust the driver
                if (conf.getDriver().equals(JoinDriver.class)) {
                    conf.setDriver(inputNum == 0 ? JoinWithSolutionSetFirstDriver.class : JoinWithSolutionSetSecondDriver.class);
                } else if (conf.getDriver().equals(CoGroupDriver.class)) {
                    conf.setDriver(inputNum == 0 ? CoGroupWithSolutionSetFirstDriver.class : CoGroupWithSolutionSetSecondDriver.class);
                } else {
                    throw new CompilerException("Found join with solution set using incompatible operator (only Join/CoGroup are valid).");
                }
            }
            // make sure we do not visit this node again. for that, we add a 'already seen'
            // entry into one of the sets
            this.chainedTasks.put(node, ALREADY_VISITED_PLACEHOLDER);
            vertex = null;
        } else if (node instanceof WorksetPlanNode) {
            // create the iteration head here
            vertex = createWorksetIterationHead((WorksetPlanNode) node);
        } else {
            throw new CompilerException("Unrecognized node type: " + node.getClass().getName());
        }
    } catch (Exception e) {
        throw new CompilerException("Error translating node '" + node + "': " + e.getMessage(), e);
    }
    // check if a vertex was created, or if it was chained or skipped
    if (vertex != null) {
        // set parallelism
        int pd = node.getParallelism();
        vertex.setParallelism(pd);
        vertex.setMaxParallelism(pd);
        vertex.setSlotSharingGroup(sharingGroup);
        // check whether this vertex is part of an iteration step function
        if (this.currentIteration != null) {
            // check that the task has the same parallelism as the iteration as such
            PlanNode iterationNode = (PlanNode) this.currentIteration;
            if (iterationNode.getParallelism() < pd) {
                throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, parallelism than the iteration operator.");
            }
            // store the id of the iterations the step functions participate in
            IterationDescriptor descr = this.iterations.get(this.currentIteration);
            new TaskConfig(vertex.getConfiguration()).setIterationId(descr.getId());
        }
        // store in the map
        this.vertices.put(node, vertex);
    }
    // returning true causes deeper descend
    return true;
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) JoinWithSolutionSetFirstDriver(org.apache.flink.runtime.operators.JoinWithSolutionSetFirstDriver) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) JoinWithSolutionSetSecondDriver(org.apache.flink.runtime.operators.JoinWithSolutionSetSecondDriver) IOException(java.io.IOException) CompilerException(org.apache.flink.optimizer.CompilerException) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) CoGroupDriver(org.apache.flink.runtime.operators.CoGroupDriver)

Example 4 with SolutionSetPlanNode

use of org.apache.flink.optimizer.plan.SolutionSetPlanNode in project flink by apache.

the class JavaApiPostPass method traverse.

protected void traverse(PlanNode node) {
    if (!alreadyDone.add(node)) {
        // already worked on that one
        return;
    }
    // distinguish the node types
    if (node instanceof SinkPlanNode) {
        // descend to the input channel
        SinkPlanNode sn = (SinkPlanNode) node;
        Channel inchannel = sn.getInput();
        traverseChannel(inchannel);
    } else if (node instanceof SourcePlanNode) {
        TypeInformation<?> typeInfo = getTypeInfoFromSource((SourcePlanNode) node);
        ((SourcePlanNode) node).setSerializer(createSerializer(typeInfo));
    } else if (node instanceof BulkIterationPlanNode) {
        BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
        if (iterationNode.getRootOfStepFunction() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile an iteration step function where next partial solution is created by a Union node.");
        }
        // utilities. Needed in case of intermediate termination criterion
        if (iterationNode.getRootOfTerminationCriterion() != null) {
            SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
            traverseChannel(addMapper.getInput());
        }
        BulkIterationBase<?> operator = (BulkIterationBase<?>) iterationNode.getProgramOperator();
        // set the serializer
        iterationNode.setSerializerForIterationChannel(createSerializer(operator.getOperatorInfo().getOutputType()));
        // done, we can now propagate our info down
        traverseChannel(iterationNode.getInput());
        traverse(iterationNode.getRootOfStepFunction());
    } else if (node instanceof WorksetIterationPlanNode) {
        WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
        if (iterationNode.getNextWorkSetPlanNode() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile a workset iteration step function where the next workset is produced by a Union node.");
        }
        if (iterationNode.getSolutionSetDeltaPlanNode() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile a workset iteration step function where the solution set delta is produced by a Union node.");
        }
        DeltaIterationBase<?, ?> operator = (DeltaIterationBase<?, ?>) iterationNode.getProgramOperator();
        // set the serializers and comparators for the workset iteration
        iterationNode.setSolutionSetSerializer(createSerializer(operator.getOperatorInfo().getFirstInputType()));
        iterationNode.setWorksetSerializer(createSerializer(operator.getOperatorInfo().getSecondInputType()));
        iterationNode.setSolutionSetComparator(createComparator(operator.getOperatorInfo().getFirstInputType(), iterationNode.getSolutionSetKeyFields(), getSortOrders(iterationNode.getSolutionSetKeyFields(), null)));
        // traverse the inputs
        traverseChannel(iterationNode.getInput1());
        traverseChannel(iterationNode.getInput2());
        // traverse the step function
        traverse(iterationNode.getSolutionSetDeltaPlanNode());
        traverse(iterationNode.getNextWorkSetPlanNode());
    } else if (node instanceof SingleInputPlanNode) {
        SingleInputPlanNode sn = (SingleInputPlanNode) node;
        if (!(sn.getOptimizerNode().getOperator() instanceof SingleInputOperator)) {
            // Special case for delta iterations
            if (sn.getOptimizerNode().getOperator() instanceof NoOpUnaryUdfOp) {
                traverseChannel(sn.getInput());
                return;
            } else {
                throw new RuntimeException("Wrong operator type found in post pass.");
            }
        }
        SingleInputOperator<?, ?, ?> singleInputOperator = (SingleInputOperator<?, ?, ?>) sn.getOptimizerNode().getOperator();
        // parameterize the node's driver strategy
        for (int i = 0; i < sn.getDriverStrategy().getNumRequiredComparators(); i++) {
            sn.setComparator(createComparator(singleInputOperator.getOperatorInfo().getInputType(), sn.getKeys(i), getSortOrders(sn.getKeys(i), sn.getSortOrders(i))), i);
        }
        // done, we can now propagate our info down
        traverseChannel(sn.getInput());
        // don't forget the broadcast inputs
        for (Channel c : sn.getBroadcastInputs()) {
            traverseChannel(c);
        }
    } else if (node instanceof DualInputPlanNode) {
        DualInputPlanNode dn = (DualInputPlanNode) node;
        if (!(dn.getOptimizerNode().getOperator() instanceof DualInputOperator)) {
            throw new RuntimeException("Wrong operator type found in post pass.");
        }
        DualInputOperator<?, ?, ?, ?> dualInputOperator = (DualInputOperator<?, ?, ?, ?>) dn.getOptimizerNode().getOperator();
        // parameterize the node's driver strategy
        if (dn.getDriverStrategy().getNumRequiredComparators() > 0) {
            dn.setComparator1(createComparator(dualInputOperator.getOperatorInfo().getFirstInputType(), dn.getKeysForInput1(), getSortOrders(dn.getKeysForInput1(), dn.getSortOrders())));
            dn.setComparator2(createComparator(dualInputOperator.getOperatorInfo().getSecondInputType(), dn.getKeysForInput2(), getSortOrders(dn.getKeysForInput2(), dn.getSortOrders())));
            dn.setPairComparator(createPairComparator(dualInputOperator.getOperatorInfo().getFirstInputType(), dualInputOperator.getOperatorInfo().getSecondInputType()));
        }
        traverseChannel(dn.getInput1());
        traverseChannel(dn.getInput2());
        // don't forget the broadcast inputs
        for (Channel c : dn.getBroadcastInputs()) {
            traverseChannel(c);
        }
    } else // catch the sources of the iterative step functions
    if (node instanceof BulkPartialSolutionPlanNode || node instanceof SolutionSetPlanNode || node instanceof WorksetPlanNode) {
    // Do nothing :D
    } else if (node instanceof NAryUnionPlanNode) {
        // Traverse to all child channels
        for (Channel channel : node.getInputs()) {
            traverseChannel(channel);
        }
    } else {
        throw new CompilerPostPassException("Unknown node type encountered: " + node.getClass().getName());
    }
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) DualInputOperator(org.apache.flink.api.common.operators.DualInputOperator) SingleInputOperator(org.apache.flink.api.common.operators.SingleInputOperator) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NoOpUnaryUdfOp(org.apache.flink.optimizer.util.NoOpUnaryUdfOp) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) CompilerPostPassException(org.apache.flink.optimizer.CompilerPostPassException) BulkIterationBase(org.apache.flink.api.common.operators.base.BulkIterationBase) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 5 with SolutionSetPlanNode

use of org.apache.flink.optimizer.plan.SolutionSetPlanNode in project flink by apache.

the class WorksetIterationNode method instantiate.

@SuppressWarnings("unchecked")
@Override
protected void instantiate(OperatorDescriptorDual operator, Channel solutionSetIn, Channel worksetIn, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReqSolutionSet, RequestedGlobalProperties globPropsReqWorkset, RequestedLocalProperties locPropsReqSolutionSet, RequestedLocalProperties locPropsReqWorkset) {
    // check for pipeline breaking using hash join with build on the solution set side
    placePipelineBreakersIfNecessary(DriverStrategy.HYBRIDHASH_BUILD_FIRST, solutionSetIn, worksetIn);
    // NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
    // Whenever we instantiate the iteration, we enumerate new candidates for the step function.
    // That way, we make sure we have an appropriate plan for each candidate for the initial
    // partial solution,
    // we have a fitting candidate for the step function (often, work is pushed out of the step
    // function).
    // Among the candidates of the step function, we keep only those that meet the requested
    // properties of the
    // current candidate initial partial solution. That makes sure these properties exist at the
    // beginning of
    // every iteration.
    // 1) Because we enumerate multiple times, we may need to clean the cached plans
    // before starting another enumeration
    this.nextWorkset.accept(PlanCacheCleaner.INSTANCE);
    this.solutionSetDelta.accept(PlanCacheCleaner.INSTANCE);
    // 2) Give the partial solution the properties of the current candidate for the initial
    // partial solution
    // This concerns currently only the workset.
    this.worksetNode.setCandidateProperties(worksetIn.getGlobalProperties(), worksetIn.getLocalProperties(), worksetIn);
    this.solutionSetNode.setCandidateProperties(this.partitionedProperties, new LocalProperties(), solutionSetIn);
    final SolutionSetPlanNode sspn = this.solutionSetNode.getCurrentSolutionSetPlanNode();
    final WorksetPlanNode wspn = this.worksetNode.getCurrentWorksetPlanNode();
    // 3) Get the alternative plans
    List<PlanNode> solutionSetDeltaCandidates = this.solutionSetDelta.getAlternativePlans(estimator);
    List<PlanNode> worksetCandidates = this.nextWorkset.getAlternativePlans(estimator);
    // 4) Throw away all that are not compatible with the properties currently requested to the
    // initial partial solution
    // Make sure that the workset candidates fulfill the input requirements
    {
        List<PlanNode> newCandidates = new ArrayList<PlanNode>();
        for (Iterator<PlanNode> planDeleter = worksetCandidates.iterator(); planDeleter.hasNext(); ) {
            PlanNode candidate = planDeleter.next();
            GlobalProperties atEndGlobal = candidate.getGlobalProperties();
            LocalProperties atEndLocal = candidate.getLocalProperties();
            FeedbackPropertiesMeetRequirementsReport report = candidate.checkPartialSolutionPropertiesMet(wspn, atEndGlobal, atEndLocal);
            if (report == FeedbackPropertiesMeetRequirementsReport.NO_PARTIAL_SOLUTION) {
            // depends only through broadcast variable on the workset solution
            } else if (report == FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
                // attach a no-op node through which we create the properties of the original
                // input
                Channel toNoOp = new Channel(candidate);
                globPropsReqWorkset.parameterizeChannel(toNoOp, false, nextWorksetRootConnection.getDataExchangeMode(), false);
                locPropsReqWorkset.parameterizeChannel(toNoOp);
                NoOpUnaryUdfOp noOpUnaryUdfOp = new NoOpUnaryUdfOp<>();
                noOpUnaryUdfOp.setInput(candidate.getProgramOperator());
                UnaryOperatorNode rebuildWorksetPropertiesNode = new UnaryOperatorNode("Rebuild Workset Properties", noOpUnaryUdfOp, true);
                rebuildWorksetPropertiesNode.setParallelism(candidate.getParallelism());
                SingleInputPlanNode rebuildWorksetPropertiesPlanNode = new SingleInputPlanNode(rebuildWorksetPropertiesNode, "Rebuild Workset Properties", toNoOp, DriverStrategy.UNARY_NO_OP);
                rebuildWorksetPropertiesPlanNode.initProperties(toNoOp.getGlobalProperties(), toNoOp.getLocalProperties());
                estimator.costOperator(rebuildWorksetPropertiesPlanNode);
                GlobalProperties atEndGlobalModified = rebuildWorksetPropertiesPlanNode.getGlobalProperties();
                LocalProperties atEndLocalModified = rebuildWorksetPropertiesPlanNode.getLocalProperties();
                if (!(atEndGlobalModified.equals(atEndGlobal) && atEndLocalModified.equals(atEndLocal))) {
                    FeedbackPropertiesMeetRequirementsReport report2 = candidate.checkPartialSolutionPropertiesMet(wspn, atEndGlobalModified, atEndLocalModified);
                    if (report2 != FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
                        newCandidates.add(rebuildWorksetPropertiesPlanNode);
                    }
                }
                // remove the original operator and add the modified candidate
                planDeleter.remove();
            }
        }
        worksetCandidates.addAll(newCandidates);
    }
    if (worksetCandidates.isEmpty()) {
        return;
    }
    // sanity check the solution set delta
    for (PlanNode solutionSetDeltaCandidate : solutionSetDeltaCandidates) {
        SingleInputPlanNode candidate = (SingleInputPlanNode) solutionSetDeltaCandidate;
        GlobalProperties gp = candidate.getGlobalProperties();
        if (gp.getPartitioning() != PartitioningProperty.HASH_PARTITIONED || gp.getPartitioningFields() == null || !gp.getPartitioningFields().equals(this.solutionSetKeyFields)) {
            throw new CompilerException("Bug: The solution set delta is not partitioned.");
        }
    }
    // 5) Create a candidate for the Iteration Node for every remaining plan of the step
    // function.
    final GlobalProperties gp = new GlobalProperties();
    gp.setHashPartitioned(this.solutionSetKeyFields);
    gp.addUniqueFieldCombination(this.solutionSetKeyFields);
    LocalProperties lp = LocalProperties.EMPTY.addUniqueFields(this.solutionSetKeyFields);
    // take all combinations of solution set delta and workset plans
    for (PlanNode worksetCandidate : worksetCandidates) {
        for (PlanNode solutionSetCandidate : solutionSetDeltaCandidates) {
            // check whether they have the same operator at their latest branching point
            if (this.singleRoot.areBranchCompatible(solutionSetCandidate, worksetCandidate)) {
                SingleInputPlanNode siSolutionDeltaCandidate = (SingleInputPlanNode) solutionSetCandidate;
                boolean immediateDeltaUpdate;
                // can update on the fly
                if (siSolutionDeltaCandidate.getInput().getShipStrategy() == ShipStrategyType.FORWARD && this.solutionDeltaImmediatelyAfterSolutionJoin) {
                    // sanity check the node and connection
                    if (siSolutionDeltaCandidate.getDriverStrategy() != DriverStrategy.UNARY_NO_OP || siSolutionDeltaCandidate.getInput().getLocalStrategy() != LocalStrategy.NONE) {
                        throw new CompilerException("Invalid Solution set delta node.");
                    }
                    solutionSetCandidate = siSolutionDeltaCandidate.getInput().getSource();
                    immediateDeltaUpdate = true;
                } else {
                    // was not partitioned, we need to keep this node.
                    // mark that we materialize the input
                    siSolutionDeltaCandidate.getInput().setTempMode(TempMode.PIPELINE_BREAKER);
                    immediateDeltaUpdate = false;
                }
                WorksetIterationPlanNode wsNode = new WorksetIterationPlanNode(this, this.getOperator().getName(), solutionSetIn, worksetIn, sspn, wspn, worksetCandidate, solutionSetCandidate);
                wsNode.setImmediateSolutionSetUpdate(immediateDeltaUpdate);
                wsNode.initProperties(gp, lp);
                target.add(wsNode);
            }
        }
    }
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) FeedbackPropertiesMeetRequirementsReport(org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) NoOpUnaryUdfOp(org.apache.flink.optimizer.util.NoOpUnaryUdfOp) Iterator(java.util.Iterator) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) ArrayList(java.util.ArrayList) FieldList(org.apache.flink.api.common.operators.util.FieldList) List(java.util.List) RequestedLocalProperties(org.apache.flink.optimizer.dataproperties.RequestedLocalProperties) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties)

Aggregations

CompilerException (org.apache.flink.optimizer.CompilerException)7 Channel (org.apache.flink.optimizer.plan.Channel)7 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)7 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)7 WorksetPlanNode (org.apache.flink.optimizer.plan.WorksetPlanNode)7 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)6 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)6 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)6 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)6 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)6 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)6 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)5 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)4 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)4 PlanNode (org.apache.flink.optimizer.plan.PlanNode)4 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)3 TaskConfig (org.apache.flink.runtime.operators.util.TaskConfig)3 IOException (java.io.IOException)2 CompilerPostPassException (org.apache.flink.optimizer.CompilerPostPassException)2 NoOpUnaryUdfOp (org.apache.flink.optimizer.util.NoOpUnaryUdfOp)2