Search in sources :

Example 26 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class CoGroupSolutionSetFirstTest method testCoGroupSolutionSet.

@Test
public void testCoGroupSolutionSet() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple1<Integer>> raw = env.readCsvFile(IN_FILE).types(Integer.class);
    DeltaIteration<Tuple1<Integer>, Tuple1<Integer>> iteration = raw.iterateDelta(raw, 1000, 0);
    DataSet<Tuple1<Integer>> test = iteration.getWorkset().map(new SimpleMap());
    DataSet<Tuple1<Integer>> delta = iteration.getSolutionSet().coGroup(test).where(0).equalTo(0).with(new SimpleCGroup());
    DataSet<Tuple1<Integer>> feedback = iteration.getWorkset().map(new SimpleMap());
    DataSet<Tuple1<Integer>> result = iteration.closeWith(delta, feedback);
    result.output(new DiscardingOutputFormat<Tuple1<Integer>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = null;
    try {
        oPlan = compileNoStats(plan);
    } catch (CompilerException e) {
        Assert.fail(e.getMessage());
    }
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof WorksetIterationPlanNode) {
                PlanNode deltaNode = ((WorksetIterationPlanNode) visitable).getSolutionSetDeltaPlanNode();
                //get the CoGroup
                DualInputPlanNode dpn = (DualInputPlanNode) deltaNode.getInputs().iterator().next().getSource();
                Channel in1 = dpn.getInput1();
                Channel in2 = dpn.getInput2();
                Assert.assertTrue(in1.getLocalProperties().getOrdering() == null);
                Assert.assertTrue(in2.getLocalProperties().getOrdering() != null);
                Assert.assertTrue(in2.getLocalProperties().getOrdering().getInvolvedIndexes().contains(0));
                Assert.assertTrue(in1.getShipStrategy() == ShipStrategyType.FORWARD);
                Assert.assertTrue(in2.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Test(org.junit.Test)

Example 27 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class AllReduceProperties method instantiate.

@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
    if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
        // locally connected, directly instantiate
        return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", in, DriverStrategy.ALL_REDUCE);
    } else {
        // non forward case.plug in a combiner
        Channel toCombiner = new Channel(in.getSource());
        toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
        // create an input node for combine with same parallelism as input node
        ReduceNode combinerNode = ((ReduceNode) node).getCombinerUtilityNode();
        combinerNode.setParallelism(in.getSource().getParallelism());
        SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode, "Combine (" + node.getOperator().getName() + ")", toCombiner, DriverStrategy.ALL_REDUCE);
        combiner.setCosts(new Costs(0, 0));
        combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
        Channel toReducer = new Channel(combiner);
        toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(), in.getShipStrategySortOrder(), in.getDataExchangeMode());
        toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
        return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", toReducer, DriverStrategy.ALL_REDUCE);
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ReduceNode(org.apache.flink.optimizer.dag.ReduceNode) Costs(org.apache.flink.optimizer.costs.Costs) Channel(org.apache.flink.optimizer.plan.Channel)

Example 28 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class TestUtils method verifyParallelism.

/**
	 * Verify operator parallelism.
	 *
	 * @param env the Flink execution environment.
	 * @param expectedParallelism expected operator parallelism
	 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
    env.setParallelism(2 * expectedParallelism);
    Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
    OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());
    List<PlanNode> queue = new ArrayList<>();
    queue.addAll(optimizedPlan.getDataSinks());
    while (queue.size() > 0) {
        PlanNode node = queue.remove(queue.size() - 1);
        // Data sources may have parallelism of 1, so simply check that the node
        // parallelism has not been increased by setting the default parallelism
        assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);
        for (Channel channel : node.getInputs()) {
            queue.add(channel.getSource());
        }
    }
}
Also used : PlanNode(org.apache.flink.optimizer.plan.PlanNode) Configuration(org.apache.flink.configuration.Configuration) Optimizer(org.apache.flink.optimizer.Optimizer) Channel(org.apache.flink.optimizer.plan.Channel) ArrayList(java.util.ArrayList) DefaultCostEstimator(org.apache.flink.optimizer.costs.DefaultCostEstimator) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan)

Example 29 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class CostEstimator method costOperator.

// ------------------------------------------------------------------------	
/**
	 * This method computes the cost of an operator. The cost is composed of cost for input shipping,
	 * locally processing an input, and running the operator.
	 * 
	 * It requires at least that all inputs are set and have a proper ship strategy set,
	 * which is not equal to <tt>NONE</tt>.
	 * 
	 * @param n The node to compute the costs for.
	 */
public void costOperator(PlanNode n) {
    // initialize costs objects with no costs
    final Costs totalCosts = new Costs();
    final long availableMemory = n.getGuaranteedAvailableMemory();
    // add the shipping strategy costs
    for (Channel channel : n.getInputs()) {
        final Costs costs = new Costs();
        switch(channel.getShipStrategy()) {
            case NONE:
                throw new CompilerException("Cannot determine costs: Shipping strategy has not been set for an input.");
            case FORWARD:
                //				costs.addHeuristicNetworkCost(channel.getMaxDepth());
                break;
            case PARTITION_RANDOM:
                addRandomPartitioningCost(channel, costs);
                break;
            case PARTITION_HASH:
            case PARTITION_CUSTOM:
                addHashPartitioningCost(channel, costs);
                break;
            case PARTITION_RANGE:
                addRangePartitionCost(channel, costs);
                break;
            case BROADCAST:
                addBroadcastCost(channel, channel.getReplicationFactor(), costs);
                break;
            case PARTITION_FORCED_REBALANCE:
                addRandomPartitioningCost(channel, costs);
                break;
            default:
                throw new CompilerException("Unknown shipping strategy for input: " + channel.getShipStrategy());
        }
        switch(channel.getLocalStrategy()) {
            case NONE:
                break;
            case SORT:
            case COMBININGSORT:
                addLocalSortCost(channel, costs);
                break;
            default:
                throw new CompilerException("Unsupported local strategy for input: " + channel.getLocalStrategy());
        }
        if (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE) {
            addArtificialDamCost(channel, 0, costs);
        }
        // adjust with the cost weight factor
        if (channel.isOnDynamicPath()) {
            costs.multiplyWith(channel.getCostWeight());
        }
        totalCosts.addCosts(costs);
    }
    Channel firstInput = null;
    Channel secondInput = null;
    Costs driverCosts = new Costs();
    int costWeight = 1;
    // adjust with the cost weight factor
    if (n.isOnDynamicPath()) {
        costWeight = n.getCostWeight();
    }
    // get the inputs, if we have some
    {
        Iterator<Channel> channels = n.getInputs().iterator();
        if (channels.hasNext()) {
            firstInput = channels.next();
        }
        if (channels.hasNext()) {
            secondInput = channels.next();
        }
    }
    // determine the local costs
    switch(n.getDriverStrategy()) {
        case NONE:
        case UNARY_NO_OP:
        case BINARY_NO_OP:
        case MAP:
        case MAP_PARTITION:
        case FLAT_MAP:
        case ALL_GROUP_REDUCE:
        case ALL_REDUCE:
        case CO_GROUP:
        case CO_GROUP_RAW:
        case SORTED_GROUP_REDUCE:
        case SORTED_REDUCE:
        case SORTED_GROUP_COMBINE:
        // partial grouping is always local and main memory resident. we should add a relative cpu cost at some point
        case ALL_GROUP_COMBINE:
        case UNION:
            break;
        case INNER_MERGE:
        case FULL_OUTER_MERGE:
        case LEFT_OUTER_MERGE:
        case RIGHT_OUTER_MERGE:
            addLocalMergeCost(firstInput, secondInput, driverCosts, costWeight);
            break;
        case HYBRIDHASH_BUILD_FIRST:
        case RIGHT_HYBRIDHASH_BUILD_FIRST:
        case LEFT_HYBRIDHASH_BUILD_FIRST:
        case FULL_OUTER_HYBRIDHASH_BUILD_FIRST:
            addHybridHashCosts(firstInput, secondInput, driverCosts, costWeight);
            break;
        case HYBRIDHASH_BUILD_SECOND:
        case LEFT_HYBRIDHASH_BUILD_SECOND:
        case RIGHT_HYBRIDHASH_BUILD_SECOND:
        case FULL_OUTER_HYBRIDHASH_BUILD_SECOND:
            addHybridHashCosts(secondInput, firstInput, driverCosts, costWeight);
            break;
        case HYBRIDHASH_BUILD_FIRST_CACHED:
            addCachedHybridHashCosts(firstInput, secondInput, driverCosts, costWeight);
            break;
        case HYBRIDHASH_BUILD_SECOND_CACHED:
            addCachedHybridHashCosts(secondInput, firstInput, driverCosts, costWeight);
            break;
        case NESTEDLOOP_BLOCKED_OUTER_FIRST:
            addBlockNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts, costWeight);
            break;
        case NESTEDLOOP_BLOCKED_OUTER_SECOND:
            addBlockNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts, costWeight);
            break;
        case NESTEDLOOP_STREAMED_OUTER_FIRST:
            addStreamedNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts, costWeight);
            break;
        case NESTEDLOOP_STREAMED_OUTER_SECOND:
            addStreamedNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts, costWeight);
            break;
        default:
            throw new CompilerException("Unknown local strategy: " + n.getDriverStrategy().name());
    }
    totalCosts.addCosts(driverCosts);
    n.setCosts(totalCosts);
}
Also used : Channel(org.apache.flink.optimizer.plan.Channel) Iterator(java.util.Iterator) CompilerException(org.apache.flink.optimizer.CompilerException)

Example 30 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class BinaryUnionNode method getAlternativePlans.

@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    // check that union has only a single successor
    if (this.getOutgoingConnections().size() > 1) {
        throw new CompilerException("BinaryUnionNode has more than one successor.");
    }
    boolean childrenSkippedDueToReplicatedInput = false;
    // check if we have a cached version
    if (this.cachedPlans != null) {
        return this.cachedPlans;
    }
    // step down to all producer nodes and calculate alternative plans
    final List<? extends PlanNode> subPlans1 = getFirstPredecessorNode().getAlternativePlans(estimator);
    final List<? extends PlanNode> subPlans2 = getSecondPredecessorNode().getAlternativePlans(estimator);
    List<DagConnection> broadcastConnections = getBroadcastConnections();
    if (broadcastConnections != null && broadcastConnections.size() > 0) {
        throw new CompilerException("Found BroadcastVariables on a Union operation");
    }
    final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
    final List<Set<? extends NamedChannel>> broadcastPlanChannels = Collections.emptyList();
    final BinaryUnionOpDescriptor operator = new BinaryUnionOpDescriptor();
    final RequestedLocalProperties noLocalProps = new RequestedLocalProperties();
    final ExecutionMode input1Mode = this.input1.getDataExchangeMode();
    final ExecutionMode input2Mode = this.input2.getDataExchangeMode();
    final int parallelism = getParallelism();
    final int inParallelism1 = getFirstPredecessorNode().getParallelism();
    final int inParallelism2 = getSecondPredecessorNode().getParallelism();
    final boolean dopChange1 = parallelism != inParallelism1;
    final boolean dopChange2 = parallelism != inParallelism2;
    final boolean input1breakPipeline = this.input1.isBreakingPipeline();
    final boolean input2breakPipeline = this.input2.isBreakingPipeline();
    // create all candidates
    for (PlanNode child1 : subPlans1) {
        if (child1.getGlobalProperties().isFullyReplicated()) {
            // fully replicated input is always locally forwarded if parallelism is not changed
            if (dopChange1) {
                // can not continue with this child
                childrenSkippedDueToReplicatedInput = true;
                continue;
            } else {
                this.input1.setShipStrategy(ShipStrategyType.FORWARD);
            }
        }
        for (PlanNode child2 : subPlans2) {
            if (child2.getGlobalProperties().isFullyReplicated()) {
                // fully replicated input is always locally forwarded if parallelism is not changed
                if (dopChange2) {
                    // can not continue with this child
                    childrenSkippedDueToReplicatedInput = true;
                    continue;
                } else {
                    this.input2.setShipStrategy(ShipStrategyType.FORWARD);
                }
            }
            // candidate at the joined branch plan. 
            if (!areBranchCompatible(child1, child2)) {
                continue;
            }
            for (RequestedGlobalProperties igps : this.channelProps) {
                // create a candidate channel for the first input. mark it cached, if the connection says so
                Channel c1 = new Channel(child1, this.input1.getMaterializationMode());
                if (this.input1.getShipStrategy() == null) {
                    // free to choose the ship strategy
                    igps.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
                    // ship strategy preserves/establishes them even under changing parallelisms
                    if (dopChange1 && !c1.getShipStrategy().isNetworkStrategy()) {
                        c1.getGlobalProperties().reset();
                    }
                } else {
                    // ship strategy fixed by compiler hint
                    ShipStrategyType shipStrategy = this.input1.getShipStrategy();
                    DataExchangeMode exMode = DataExchangeMode.select(input1Mode, shipStrategy, input1breakPipeline);
                    if (this.keys1 != null) {
                        c1.setShipStrategy(this.input1.getShipStrategy(), this.keys1.toFieldList(), exMode);
                    } else {
                        c1.setShipStrategy(this.input1.getShipStrategy(), exMode);
                    }
                    if (dopChange1) {
                        c1.adjustGlobalPropertiesForFullParallelismChange();
                    }
                }
                // create a candidate channel for the second input. mark it cached, if the connection says so
                Channel c2 = new Channel(child2, this.input2.getMaterializationMode());
                if (this.input2.getShipStrategy() == null) {
                    // free to choose the ship strategy
                    igps.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
                    // ship strategy preserves/establishes them even under changing parallelisms
                    if (dopChange2 && !c2.getShipStrategy().isNetworkStrategy()) {
                        c2.getGlobalProperties().reset();
                    }
                } else {
                    // ship strategy fixed by compiler hint
                    ShipStrategyType shipStrategy = this.input2.getShipStrategy();
                    DataExchangeMode exMode = DataExchangeMode.select(input2Mode, shipStrategy, input2breakPipeline);
                    if (this.keys2 != null) {
                        c2.setShipStrategy(this.input2.getShipStrategy(), this.keys2.toFieldList(), exMode);
                    } else {
                        c2.setShipStrategy(this.input2.getShipStrategy(), exMode);
                    }
                    if (dopChange2) {
                        c2.adjustGlobalPropertiesForFullParallelismChange();
                    }
                }
                // get the global properties and clear unique fields (not preserved anyways during the union)
                GlobalProperties p1 = c1.getGlobalProperties();
                GlobalProperties p2 = c2.getGlobalProperties();
                p1.clearUniqueFieldCombinations();
                p2.clearUniqueFieldCombinations();
                // partitioned on that field. 
                if (!igps.isTrivial() && !(p1.equals(p2))) {
                    if (c1.getShipStrategy() == ShipStrategyType.FORWARD && c2.getShipStrategy() != ShipStrategyType.FORWARD) {
                        // adjust c2 to c1
                        c2 = c2.clone();
                        p1.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
                    } else if (c2.getShipStrategy() == ShipStrategyType.FORWARD && c1.getShipStrategy() != ShipStrategyType.FORWARD) {
                        // adjust c1 to c2
                        c1 = c1.clone();
                        p2.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
                    } else if (c1.getShipStrategy() == ShipStrategyType.FORWARD && c2.getShipStrategy() == ShipStrategyType.FORWARD) {
                        boolean adjustC1 = c1.getEstimatedOutputSize() <= 0 || c2.getEstimatedOutputSize() <= 0 || c1.getEstimatedOutputSize() <= c2.getEstimatedOutputSize();
                        if (adjustC1) {
                            c2 = c2.clone();
                            p1.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
                        } else {
                            c1 = c1.clone();
                            p2.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
                        }
                    } else {
                        // excluded by the check that the required strategies must match
                        throw new CompilerException("Bug in Plan Enumeration for Union Node.");
                    }
                }
                instantiate(operator, c1, c2, broadcastPlanChannels, outputPlans, estimator, igps, igps, noLocalProps, noLocalProps);
            }
        }
    }
    if (outputPlans.isEmpty()) {
        if (childrenSkippedDueToReplicatedInput) {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
        } else {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
        }
    }
    // cost and prune the plans
    for (PlanNode node : outputPlans) {
        estimator.costOperator(node);
    }
    prunePlanAlternatives(outputPlans);
    outputPlans.trimToSize();
    this.cachedPlans = outputPlans;
    return outputPlans;
}
Also used : RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) Set(java.util.Set) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) ArrayList(java.util.ArrayList) ExecutionMode(org.apache.flink.api.common.ExecutionMode) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) RequestedLocalProperties(org.apache.flink.optimizer.dataproperties.RequestedLocalProperties) PlanNode(org.apache.flink.optimizer.plan.PlanNode) RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) DataExchangeMode(org.apache.flink.runtime.io.network.DataExchangeMode) CompilerException(org.apache.flink.optimizer.CompilerException) BinaryUnionOpDescriptor(org.apache.flink.optimizer.operators.BinaryUnionOpDescriptor)

Aggregations

Channel (org.apache.flink.optimizer.plan.Channel)60 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)41 Test (org.junit.Test)30 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)26 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)24 PlanNode (org.apache.flink.optimizer.plan.PlanNode)24 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)23 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)23 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)20 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)19 Plan (org.apache.flink.api.common.Plan)18 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)18 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)16 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)16 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)15 CompilerException (org.apache.flink.optimizer.CompilerException)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)13 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)13 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)13 FieldList (org.apache.flink.api.common.operators.util.FieldList)12