use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class CoGroupSolutionSetFirstTest method testCoGroupSolutionSet.
@Test
public void testCoGroupSolutionSet() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple1<Integer>> raw = env.readCsvFile(IN_FILE).types(Integer.class);
DeltaIteration<Tuple1<Integer>, Tuple1<Integer>> iteration = raw.iterateDelta(raw, 1000, 0);
DataSet<Tuple1<Integer>> test = iteration.getWorkset().map(new SimpleMap());
DataSet<Tuple1<Integer>> delta = iteration.getSolutionSet().coGroup(test).where(0).equalTo(0).with(new SimpleCGroup());
DataSet<Tuple1<Integer>> feedback = iteration.getWorkset().map(new SimpleMap());
DataSet<Tuple1<Integer>> result = iteration.closeWith(delta, feedback);
result.output(new DiscardingOutputFormat<Tuple1<Integer>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = null;
try {
oPlan = compileNoStats(plan);
} catch (CompilerException e) {
Assert.fail(e.getMessage());
}
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof WorksetIterationPlanNode) {
PlanNode deltaNode = ((WorksetIterationPlanNode) visitable).getSolutionSetDeltaPlanNode();
//get the CoGroup
DualInputPlanNode dpn = (DualInputPlanNode) deltaNode.getInputs().iterator().next().getSource();
Channel in1 = dpn.getInput1();
Channel in2 = dpn.getInput2();
Assert.assertTrue(in1.getLocalProperties().getOrdering() == null);
Assert.assertTrue(in2.getLocalProperties().getOrdering() != null);
Assert.assertTrue(in2.getLocalProperties().getOrdering().getInvolvedIndexes().contains(0));
Assert.assertTrue(in1.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue(in2.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
}
});
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class AllReduceProperties method instantiate.
@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
// locally connected, directly instantiate
return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", in, DriverStrategy.ALL_REDUCE);
} else {
// non forward case.plug in a combiner
Channel toCombiner = new Channel(in.getSource());
toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
// create an input node for combine with same parallelism as input node
ReduceNode combinerNode = ((ReduceNode) node).getCombinerUtilityNode();
combinerNode.setParallelism(in.getSource().getParallelism());
SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode, "Combine (" + node.getOperator().getName() + ")", toCombiner, DriverStrategy.ALL_REDUCE);
combiner.setCosts(new Costs(0, 0));
combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
Channel toReducer = new Channel(combiner);
toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(), in.getShipStrategySortOrder(), in.getDataExchangeMode());
toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", toReducer, DriverStrategy.ALL_REDUCE);
}
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class TestUtils method verifyParallelism.
/**
* Verify operator parallelism.
*
* @param env the Flink execution environment.
* @param expectedParallelism expected operator parallelism
*/
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
env.setParallelism(2 * expectedParallelism);
Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());
List<PlanNode> queue = new ArrayList<>();
queue.addAll(optimizedPlan.getDataSinks());
while (queue.size() > 0) {
PlanNode node = queue.remove(queue.size() - 1);
// Data sources may have parallelism of 1, so simply check that the node
// parallelism has not been increased by setting the default parallelism
assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);
for (Channel channel : node.getInputs()) {
queue.add(channel.getSource());
}
}
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class CostEstimator method costOperator.
// ------------------------------------------------------------------------
/**
* This method computes the cost of an operator. The cost is composed of cost for input shipping,
* locally processing an input, and running the operator.
*
* It requires at least that all inputs are set and have a proper ship strategy set,
* which is not equal to <tt>NONE</tt>.
*
* @param n The node to compute the costs for.
*/
public void costOperator(PlanNode n) {
// initialize costs objects with no costs
final Costs totalCosts = new Costs();
final long availableMemory = n.getGuaranteedAvailableMemory();
// add the shipping strategy costs
for (Channel channel : n.getInputs()) {
final Costs costs = new Costs();
switch(channel.getShipStrategy()) {
case NONE:
throw new CompilerException("Cannot determine costs: Shipping strategy has not been set for an input.");
case FORWARD:
// costs.addHeuristicNetworkCost(channel.getMaxDepth());
break;
case PARTITION_RANDOM:
addRandomPartitioningCost(channel, costs);
break;
case PARTITION_HASH:
case PARTITION_CUSTOM:
addHashPartitioningCost(channel, costs);
break;
case PARTITION_RANGE:
addRangePartitionCost(channel, costs);
break;
case BROADCAST:
addBroadcastCost(channel, channel.getReplicationFactor(), costs);
break;
case PARTITION_FORCED_REBALANCE:
addRandomPartitioningCost(channel, costs);
break;
default:
throw new CompilerException("Unknown shipping strategy for input: " + channel.getShipStrategy());
}
switch(channel.getLocalStrategy()) {
case NONE:
break;
case SORT:
case COMBININGSORT:
addLocalSortCost(channel, costs);
break;
default:
throw new CompilerException("Unsupported local strategy for input: " + channel.getLocalStrategy());
}
if (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE) {
addArtificialDamCost(channel, 0, costs);
}
// adjust with the cost weight factor
if (channel.isOnDynamicPath()) {
costs.multiplyWith(channel.getCostWeight());
}
totalCosts.addCosts(costs);
}
Channel firstInput = null;
Channel secondInput = null;
Costs driverCosts = new Costs();
int costWeight = 1;
// adjust with the cost weight factor
if (n.isOnDynamicPath()) {
costWeight = n.getCostWeight();
}
// get the inputs, if we have some
{
Iterator<Channel> channels = n.getInputs().iterator();
if (channels.hasNext()) {
firstInput = channels.next();
}
if (channels.hasNext()) {
secondInput = channels.next();
}
}
// determine the local costs
switch(n.getDriverStrategy()) {
case NONE:
case UNARY_NO_OP:
case BINARY_NO_OP:
case MAP:
case MAP_PARTITION:
case FLAT_MAP:
case ALL_GROUP_REDUCE:
case ALL_REDUCE:
case CO_GROUP:
case CO_GROUP_RAW:
case SORTED_GROUP_REDUCE:
case SORTED_REDUCE:
case SORTED_GROUP_COMBINE:
// partial grouping is always local and main memory resident. we should add a relative cpu cost at some point
case ALL_GROUP_COMBINE:
case UNION:
break;
case INNER_MERGE:
case FULL_OUTER_MERGE:
case LEFT_OUTER_MERGE:
case RIGHT_OUTER_MERGE:
addLocalMergeCost(firstInput, secondInput, driverCosts, costWeight);
break;
case HYBRIDHASH_BUILD_FIRST:
case RIGHT_HYBRIDHASH_BUILD_FIRST:
case LEFT_HYBRIDHASH_BUILD_FIRST:
case FULL_OUTER_HYBRIDHASH_BUILD_FIRST:
addHybridHashCosts(firstInput, secondInput, driverCosts, costWeight);
break;
case HYBRIDHASH_BUILD_SECOND:
case LEFT_HYBRIDHASH_BUILD_SECOND:
case RIGHT_HYBRIDHASH_BUILD_SECOND:
case FULL_OUTER_HYBRIDHASH_BUILD_SECOND:
addHybridHashCosts(secondInput, firstInput, driverCosts, costWeight);
break;
case HYBRIDHASH_BUILD_FIRST_CACHED:
addCachedHybridHashCosts(firstInput, secondInput, driverCosts, costWeight);
break;
case HYBRIDHASH_BUILD_SECOND_CACHED:
addCachedHybridHashCosts(secondInput, firstInput, driverCosts, costWeight);
break;
case NESTEDLOOP_BLOCKED_OUTER_FIRST:
addBlockNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts, costWeight);
break;
case NESTEDLOOP_BLOCKED_OUTER_SECOND:
addBlockNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts, costWeight);
break;
case NESTEDLOOP_STREAMED_OUTER_FIRST:
addStreamedNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts, costWeight);
break;
case NESTEDLOOP_STREAMED_OUTER_SECOND:
addStreamedNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts, costWeight);
break;
default:
throw new CompilerException("Unknown local strategy: " + n.getDriverStrategy().name());
}
totalCosts.addCosts(driverCosts);
n.setCosts(totalCosts);
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class BinaryUnionNode method getAlternativePlans.
@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
// check that union has only a single successor
if (this.getOutgoingConnections().size() > 1) {
throw new CompilerException("BinaryUnionNode has more than one successor.");
}
boolean childrenSkippedDueToReplicatedInput = false;
// check if we have a cached version
if (this.cachedPlans != null) {
return this.cachedPlans;
}
// step down to all producer nodes and calculate alternative plans
final List<? extends PlanNode> subPlans1 = getFirstPredecessorNode().getAlternativePlans(estimator);
final List<? extends PlanNode> subPlans2 = getSecondPredecessorNode().getAlternativePlans(estimator);
List<DagConnection> broadcastConnections = getBroadcastConnections();
if (broadcastConnections != null && broadcastConnections.size() > 0) {
throw new CompilerException("Found BroadcastVariables on a Union operation");
}
final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
final List<Set<? extends NamedChannel>> broadcastPlanChannels = Collections.emptyList();
final BinaryUnionOpDescriptor operator = new BinaryUnionOpDescriptor();
final RequestedLocalProperties noLocalProps = new RequestedLocalProperties();
final ExecutionMode input1Mode = this.input1.getDataExchangeMode();
final ExecutionMode input2Mode = this.input2.getDataExchangeMode();
final int parallelism = getParallelism();
final int inParallelism1 = getFirstPredecessorNode().getParallelism();
final int inParallelism2 = getSecondPredecessorNode().getParallelism();
final boolean dopChange1 = parallelism != inParallelism1;
final boolean dopChange2 = parallelism != inParallelism2;
final boolean input1breakPipeline = this.input1.isBreakingPipeline();
final boolean input2breakPipeline = this.input2.isBreakingPipeline();
// create all candidates
for (PlanNode child1 : subPlans1) {
if (child1.getGlobalProperties().isFullyReplicated()) {
// fully replicated input is always locally forwarded if parallelism is not changed
if (dopChange1) {
// can not continue with this child
childrenSkippedDueToReplicatedInput = true;
continue;
} else {
this.input1.setShipStrategy(ShipStrategyType.FORWARD);
}
}
for (PlanNode child2 : subPlans2) {
if (child2.getGlobalProperties().isFullyReplicated()) {
// fully replicated input is always locally forwarded if parallelism is not changed
if (dopChange2) {
// can not continue with this child
childrenSkippedDueToReplicatedInput = true;
continue;
} else {
this.input2.setShipStrategy(ShipStrategyType.FORWARD);
}
}
// candidate at the joined branch plan.
if (!areBranchCompatible(child1, child2)) {
continue;
}
for (RequestedGlobalProperties igps : this.channelProps) {
// create a candidate channel for the first input. mark it cached, if the connection says so
Channel c1 = new Channel(child1, this.input1.getMaterializationMode());
if (this.input1.getShipStrategy() == null) {
// free to choose the ship strategy
igps.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
// ship strategy preserves/establishes them even under changing parallelisms
if (dopChange1 && !c1.getShipStrategy().isNetworkStrategy()) {
c1.getGlobalProperties().reset();
}
} else {
// ship strategy fixed by compiler hint
ShipStrategyType shipStrategy = this.input1.getShipStrategy();
DataExchangeMode exMode = DataExchangeMode.select(input1Mode, shipStrategy, input1breakPipeline);
if (this.keys1 != null) {
c1.setShipStrategy(this.input1.getShipStrategy(), this.keys1.toFieldList(), exMode);
} else {
c1.setShipStrategy(this.input1.getShipStrategy(), exMode);
}
if (dopChange1) {
c1.adjustGlobalPropertiesForFullParallelismChange();
}
}
// create a candidate channel for the second input. mark it cached, if the connection says so
Channel c2 = new Channel(child2, this.input2.getMaterializationMode());
if (this.input2.getShipStrategy() == null) {
// free to choose the ship strategy
igps.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
// ship strategy preserves/establishes them even under changing parallelisms
if (dopChange2 && !c2.getShipStrategy().isNetworkStrategy()) {
c2.getGlobalProperties().reset();
}
} else {
// ship strategy fixed by compiler hint
ShipStrategyType shipStrategy = this.input2.getShipStrategy();
DataExchangeMode exMode = DataExchangeMode.select(input2Mode, shipStrategy, input2breakPipeline);
if (this.keys2 != null) {
c2.setShipStrategy(this.input2.getShipStrategy(), this.keys2.toFieldList(), exMode);
} else {
c2.setShipStrategy(this.input2.getShipStrategy(), exMode);
}
if (dopChange2) {
c2.adjustGlobalPropertiesForFullParallelismChange();
}
}
// get the global properties and clear unique fields (not preserved anyways during the union)
GlobalProperties p1 = c1.getGlobalProperties();
GlobalProperties p2 = c2.getGlobalProperties();
p1.clearUniqueFieldCombinations();
p2.clearUniqueFieldCombinations();
// partitioned on that field.
if (!igps.isTrivial() && !(p1.equals(p2))) {
if (c1.getShipStrategy() == ShipStrategyType.FORWARD && c2.getShipStrategy() != ShipStrategyType.FORWARD) {
// adjust c2 to c1
c2 = c2.clone();
p1.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
} else if (c2.getShipStrategy() == ShipStrategyType.FORWARD && c1.getShipStrategy() != ShipStrategyType.FORWARD) {
// adjust c1 to c2
c1 = c1.clone();
p2.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
} else if (c1.getShipStrategy() == ShipStrategyType.FORWARD && c2.getShipStrategy() == ShipStrategyType.FORWARD) {
boolean adjustC1 = c1.getEstimatedOutputSize() <= 0 || c2.getEstimatedOutputSize() <= 0 || c1.getEstimatedOutputSize() <= c2.getEstimatedOutputSize();
if (adjustC1) {
c2 = c2.clone();
p1.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
} else {
c1 = c1.clone();
p2.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
}
} else {
// excluded by the check that the required strategies must match
throw new CompilerException("Bug in Plan Enumeration for Union Node.");
}
}
instantiate(operator, c1, c2, broadcastPlanChannels, outputPlans, estimator, igps, igps, noLocalProps, noLocalProps);
}
}
}
if (outputPlans.isEmpty()) {
if (childrenSkippedDueToReplicatedInput) {
throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
} else {
throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
}
}
// cost and prune the plans
for (PlanNode node : outputPlans) {
estimator.costOperator(node);
}
prunePlanAlternatives(outputPlans);
outputPlans.trimToSize();
this.cachedPlans = outputPlans;
return outputPlans;
}
Aggregations