use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.
the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindMapPartition.
/**
* Tests join program with replicated data source behind map partition.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.mapPartition(new IdPMap()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.
the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInput.
/**
* Tests join program with replicated data source.
*/
@Test
public void checkJoinWithReplicatedSourceInput() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.
the class TwoInputNode method getAlternativePlans.
@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
// check if we have a cached version
if (this.cachedPlans != null) {
return this.cachedPlans;
}
boolean childrenSkippedDueToReplicatedInput = false;
// step down to all producer nodes and calculate alternative plans
final List<? extends PlanNode> subPlans1 = getFirstPredecessorNode().getAlternativePlans(estimator);
final List<? extends PlanNode> subPlans2 = getSecondPredecessorNode().getAlternativePlans(estimator);
// calculate alternative sub-plans for predecessor
final Set<RequestedGlobalProperties> intGlobal1 = this.input1.getInterestingProperties().getGlobalProperties();
final Set<RequestedGlobalProperties> intGlobal2 = this.input2.getInterestingProperties().getGlobalProperties();
// calculate alternative sub-plans for broadcast inputs
final List<Set<? extends NamedChannel>> broadcastPlanChannels = new ArrayList<Set<? extends NamedChannel>>();
List<DagConnection> broadcastConnections = getBroadcastConnections();
List<String> broadcastConnectionNames = getBroadcastConnectionNames();
for (int i = 0; i < broadcastConnections.size(); i++) {
DagConnection broadcastConnection = broadcastConnections.get(i);
String broadcastConnectionName = broadcastConnectionNames.get(i);
List<PlanNode> broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);
// wrap the plan candidates in named channels
HashSet<NamedChannel> broadcastChannels = new HashSet<NamedChannel>(broadcastPlanCandidates.size());
for (PlanNode plan : broadcastPlanCandidates) {
final NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
DataExchangeMode exMode = DataExchangeMode.select(broadcastConnection.getDataExchangeMode(), ShipStrategyType.BROADCAST, broadcastConnection.isBreakingPipeline());
c.setShipStrategy(ShipStrategyType.BROADCAST, exMode);
broadcastChannels.add(c);
}
broadcastPlanChannels.add(broadcastChannels);
}
final GlobalPropertiesPair[] allGlobalPairs;
final LocalPropertiesPair[] allLocalPairs;
{
Set<GlobalPropertiesPair> pairsGlob = new HashSet<GlobalPropertiesPair>();
Set<LocalPropertiesPair> pairsLoc = new HashSet<LocalPropertiesPair>();
for (OperatorDescriptorDual ods : getProperties()) {
pairsGlob.addAll(ods.getPossibleGlobalProperties());
pairsLoc.addAll(ods.getPossibleLocalProperties());
}
allGlobalPairs = pairsGlob.toArray(new GlobalPropertiesPair[pairsGlob.size()]);
allLocalPairs = pairsLoc.toArray(new LocalPropertiesPair[pairsLoc.size()]);
}
final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
final ExecutionMode input1Mode = this.input1.getDataExchangeMode();
final ExecutionMode input2Mode = this.input2.getDataExchangeMode();
final int parallelism = getParallelism();
final int inParallelism1 = getFirstPredecessorNode().getParallelism();
final int inParallelism2 = getSecondPredecessorNode().getParallelism();
final boolean dopChange1 = parallelism != inParallelism1;
final boolean dopChange2 = parallelism != inParallelism2;
final boolean input1breaksPipeline = this.input1.isBreakingPipeline();
final boolean input2breaksPipeline = this.input2.isBreakingPipeline();
// create all candidates
for (PlanNode child1 : subPlans1) {
if (child1.getGlobalProperties().isFullyReplicated()) {
// fully replicated input is always locally forwarded if parallelism is not changed
if (dopChange1) {
// can not continue with this child
childrenSkippedDueToReplicatedInput = true;
continue;
} else {
this.input1.setShipStrategy(ShipStrategyType.FORWARD);
}
}
for (PlanNode child2 : subPlans2) {
if (child2.getGlobalProperties().isFullyReplicated()) {
// changed
if (dopChange2) {
// can not continue with this child
childrenSkippedDueToReplicatedInput = true;
continue;
} else {
this.input2.setShipStrategy(ShipStrategyType.FORWARD);
}
}
// candidate at the joined branch plan.
if (!areBranchCompatible(child1, child2)) {
continue;
}
for (RequestedGlobalProperties igps1 : intGlobal1) {
// create a candidate channel for the first input. mark it cached, if the
// connection says so
final Channel c1 = new Channel(child1, this.input1.getMaterializationMode());
if (this.input1.getShipStrategy() == null) {
// free to choose the ship strategy
igps1.parameterizeChannel(c1, dopChange1, input1Mode, input1breaksPipeline);
// ship strategy preserves/establishes them even under changing parallelisms
if (dopChange1 && !c1.getShipStrategy().isNetworkStrategy()) {
c1.getGlobalProperties().reset();
}
} else {
// ship strategy fixed by compiler hint
ShipStrategyType shipType = this.input1.getShipStrategy();
DataExchangeMode exMode = DataExchangeMode.select(input1Mode, shipType, input1breaksPipeline);
if (this.keys1 != null) {
c1.setShipStrategy(shipType, this.keys1.toFieldList(), exMode);
} else {
c1.setShipStrategy(shipType, exMode);
}
if (dopChange1) {
c1.adjustGlobalPropertiesForFullParallelismChange();
}
}
for (RequestedGlobalProperties igps2 : intGlobal2) {
// create a candidate channel for the second input. mark it cached, if the
// connection says so
final Channel c2 = new Channel(child2, this.input2.getMaterializationMode());
if (this.input2.getShipStrategy() == null) {
// free to choose the ship strategy
igps2.parameterizeChannel(c2, dopChange2, input2Mode, input2breaksPipeline);
// parallelisms
if (dopChange2 && !c2.getShipStrategy().isNetworkStrategy()) {
c2.getGlobalProperties().reset();
}
} else {
// ship strategy fixed by compiler hint
ShipStrategyType shipType = this.input2.getShipStrategy();
DataExchangeMode exMode = DataExchangeMode.select(input2Mode, shipType, input2breaksPipeline);
if (this.keys2 != null) {
c2.setShipStrategy(shipType, this.keys2.toFieldList(), exMode);
} else {
c2.setShipStrategy(shipType, exMode);
}
if (dopChange2) {
c2.adjustGlobalPropertiesForFullParallelismChange();
}
}
outer: for (GlobalPropertiesPair gpp : allGlobalPairs) {
if (gpp.getProperties1().isMetBy(c1.getGlobalProperties()) && gpp.getProperties2().isMetBy(c2.getGlobalProperties())) {
for (OperatorDescriptorDual desc : getProperties()) {
if (desc.areCompatible(gpp.getProperties1(), gpp.getProperties2(), c1.getGlobalProperties(), c2.getGlobalProperties())) {
Channel c1Clone = c1.clone();
c1Clone.setRequiredGlobalProps(gpp.getProperties1());
c2.setRequiredGlobalProps(gpp.getProperties2());
// we form a valid combination, so create the local
// candidates
// for this
addLocalCandidates(c1Clone, c2, broadcastPlanChannels, igps1, igps2, outputPlans, allLocalPairs, estimator);
break outer;
}
}
}
}
// so we can stop after the first
if (this.input2.getShipStrategy() != null) {
break;
}
}
// so we can stop after the first
if (this.input1.getShipStrategy() != null) {
break;
}
}
}
}
if (outputPlans.isEmpty()) {
if (childrenSkippedDueToReplicatedInput) {
throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
} else {
throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
}
}
// cost and prune the plans
for (PlanNode node : outputPlans) {
estimator.costOperator(node);
}
prunePlanAlternatives(outputPlans);
outputPlans.trimToSize();
this.cachedPlans = outputPlans;
return outputPlans;
}
use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.
the class SingleInputNode method setInput.
@Override
public void setInput(Map<Operator<?>, OptimizerNode> contractToNode, ExecutionMode defaultExchangeMode) throws CompilerException {
// see if an internal hint dictates the strategy to use
final Configuration conf = getOperator().getParameters();
final String shipStrategy = conf.getString(Optimizer.HINT_SHIP_STRATEGY, null);
final ShipStrategyType preSet;
if (shipStrategy != null) {
if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH)) {
preSet = ShipStrategyType.PARTITION_HASH;
} else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION_RANGE)) {
preSet = ShipStrategyType.PARTITION_RANGE;
} else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_FORWARD)) {
preSet = ShipStrategyType.FORWARD;
} else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION)) {
preSet = ShipStrategyType.PARTITION_RANDOM;
} else {
throw new CompilerException("Unrecognized ship strategy hint: " + shipStrategy);
}
} else {
preSet = null;
}
// get the predecessor node
Operator<?> children = ((SingleInputOperator<?, ?, ?>) getOperator()).getInput();
OptimizerNode pred;
DagConnection conn;
if (children == null) {
throw new CompilerException("Error: Node for '" + getOperator().getName() + "' has no input.");
} else {
pred = contractToNode.get(children);
conn = new DagConnection(pred, this, defaultExchangeMode);
if (preSet != null) {
conn.setShipStrategy(preSet);
}
}
// create the connection and add it
setIncomingConnection(conn);
pred.addOutgoingConnection(conn);
}
use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.
the class WorksetIterationsJavaApiCompilerTest method testJavaApiWithDeferredSoltionSetUpdateWithMapper.
@Test
public void testJavaApiWithDeferredSoltionSetUpdateWithMapper() {
try {
Plan plan = getJavaTestPlan(false, true);
OptimizedPlan oPlan = compileNoStats(plan);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
DualInputPlanNode joinWithInvariantNode = resolver.getNode(JOIN_WITH_INVARIANT_NAME);
DualInputPlanNode joinWithSolutionSetNode = resolver.getNode(JOIN_WITH_SOLUTION_SET);
SingleInputPlanNode worksetReducer = resolver.getNode(NEXT_WORKSET_REDUCER_NAME);
SingleInputPlanNode deltaMapper = resolver.getNode(SOLUTION_DELTA_MAPPER_NAME);
// iteration preserves partitioning in reducer, so the first partitioning is out of the
// loop,
// the in-loop partitioning is before the final reducer
// verify joinWithInvariant
assertEquals(ShipStrategyType.FORWARD, joinWithInvariantNode.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, joinWithInvariantNode.getInput2().getShipStrategy());
assertEquals(new FieldList(1, 2), joinWithInvariantNode.getKeysForInput1());
assertEquals(new FieldList(1, 2), joinWithInvariantNode.getKeysForInput2());
// verify joinWithSolutionSet
assertEquals(ShipStrategyType.PARTITION_HASH, joinWithSolutionSetNode.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getInput2().getShipStrategy());
assertEquals(new FieldList(1, 0), joinWithSolutionSetNode.getKeysForInput1());
// verify reducer
assertEquals(ShipStrategyType.PARTITION_HASH, worksetReducer.getInput().getShipStrategy());
assertEquals(new FieldList(1, 2), worksetReducer.getKeys(0));
// currently, the system may partition before or after the mapper
ShipStrategyType ss1 = deltaMapper.getInput().getShipStrategy();
ShipStrategyType ss2 = deltaMapper.getOutgoingChannels().get(0).getShipStrategy();
assertTrue((ss1 == ShipStrategyType.FORWARD && ss2 == ShipStrategyType.PARTITION_HASH) || (ss2 == ShipStrategyType.FORWARD && ss1 == ShipStrategyType.PARTITION_HASH));
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test errored: " + e.getMessage());
}
}
Aggregations