use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class MultipleJoinsWithSolutionSetCompilerTest method testMultiSolutionSetJoinPlan.
@Test
public void testMultiSolutionSetJoinPlan() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0));
DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10);
// add two sinks, to test the case of branching after an iteration
result.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
result.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
Plan p = env.createProgramPlan();
OptimizedPlan optPlan = compileNoStats(p);
OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
DualInputPlanNode join1 = or.getNode(JOIN_1);
DualInputPlanNode join2 = or.getNode(JOIN_2);
assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, join1.getDriverStrategy());
assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, join2.getDriverStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, join1.getInput2().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, join2.getInput1().getShipStrategy());
assertEquals(SolutionSetPlanNode.class, join1.getInput1().getSource().getClass());
assertEquals(SolutionSetPlanNode.class, join2.getInput2().getSource().getClass());
new JobGraphGenerator().compileJobGraph(optPlan);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test erroneous: " + e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class RelationalQueryCompilerTest method testQueryGeneric.
private void testQueryGeneric(Plan p, long orderSize, long lineitemSize, float orderSelectivity, float joinSelectivity, boolean broadcastOkay, boolean partitionedOkay, boolean hashJoinFirstOkay, boolean hashJoinSecondOkay, boolean mergeJoinOkay) {
try {
// set statistics
OperatorResolver cr = getContractResolver(p);
GenericDataSourceBase<?, ?> ordersSource = cr.getNode(ORDERS);
GenericDataSourceBase<?, ?> lineItemSource = cr.getNode(LINEITEM);
SingleInputOperator<?, ?, ?> mapper = cr.getNode(MAPPER_NAME);
DualInputOperator<?, ?, ?, ?> joiner = cr.getNode(JOIN_NAME);
setSourceStatistics(ordersSource, orderSize, 100f);
setSourceStatistics(lineItemSource, lineitemSize, 140f);
mapper.getCompilerHints().setAvgOutputRecordSize(16f);
mapper.getCompilerHints().setFilterFactor(orderSelectivity);
joiner.getCompilerHints().setFilterFactor(joinSelectivity);
// compile
final OptimizedPlan plan = compileWithStats(p);
final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan);
// get the nodes from the final plan
final SinkPlanNode sink = or.getNode(SINK);
final SingleInputPlanNode reducer = or.getNode(REDUCE_NAME);
final SingleInputPlanNode combiner = reducer.getPredecessor() instanceof SingleInputPlanNode ? (SingleInputPlanNode) reducer.getPredecessor() : null;
final DualInputPlanNode join = or.getNode(JOIN_NAME);
final SingleInputPlanNode filteringMapper = or.getNode(MAPPER_NAME);
checkStandardStrategies(filteringMapper, join, combiner, reducer, sink);
// check the possible variants and that the variant ia allowed in this specific setting
if (checkBroadcastShipStrategies(join, reducer, combiner)) {
Assert.assertTrue("Broadcast join incorrectly chosen.", broadcastOkay);
if (checkHashJoinStrategies(join, reducer, true)) {
Assert.assertTrue("Hash join (build orders) incorrectly chosen", hashJoinFirstOkay);
} else if (checkHashJoinStrategies(join, reducer, false)) {
Assert.assertTrue("Hash join (build lineitem) incorrectly chosen", hashJoinSecondOkay);
} else if (checkBroadcastMergeJoin(join, reducer)) {
Assert.assertTrue("Merge join incorrectly chosen", mergeJoinOkay);
} else {
Assert.fail("Plan has no correct hash join or merge join strategies.");
}
} else if (checkRepartitionShipStrategies(join, reducer, combiner)) {
Assert.assertTrue("Partitioned join incorrectly chosen.", partitionedOkay);
if (checkHashJoinStrategies(join, reducer, true)) {
Assert.assertTrue("Hash join (build orders) incorrectly chosen", hashJoinFirstOkay);
} else if (checkHashJoinStrategies(join, reducer, false)) {
Assert.assertTrue("Hash join (build lineitem) incorrectly chosen", hashJoinSecondOkay);
} else if (checkRepartitionMergeJoin(join, reducer)) {
Assert.assertTrue("Merge join incorrectly chosen", mergeJoinOkay);
} else {
Assert.fail("Plan has no correct hash join or merge join strategies.");
}
} else {
Assert.fail("Plan has neither correct BC join or partitioned join configuration.");
}
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class PregelCompilerTest method testPregelCompilerWithBroadcastVariable.
@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
try {
final String BC_VAR_NAME = "borat variable";
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Long> bcVar = env.fromElements(1L);
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
VertexCentricConfiguration parameters = new VertexCentricConfiguration();
parameters.addBroadcastSet(BC_VAR_NAME, bcVar);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100, parameters).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class SpargelCompilerTest method testSpargelCompilerWithBroadcastVariable.
@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
try {
final String BC_VAR_NAME = "borat variable";
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Long> bcVar = env.fromElements(1L);
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
parameters.addBroadcastSetForScatterFunction(BC_VAR_NAME, bcVar);
parameters.addBroadcastSetForGatherFunction(BC_VAR_NAME, bcVar);
DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(new ConnectedComponents.CCMessenger<Long, Long>(BasicTypeInfo.LONG_TYPE_INFO), new ConnectedComponents.CCUpdater<Long, Long>(), 100).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("Spargel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set join and the delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
// this is only true if the update functions preserves the partitioning
assertTrue(ssDelta instanceof DualInputPlanNode);
DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
// check the workset set join
DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
assertTrue(edgeJoin.getInput1().getTempMode().isCached());
assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class GSACompilerTest method testGSACompiler.
@Test
public void testGSACompiler() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
DataSet<Vertex<Long, Long>> result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), 100).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("GSA Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
assertEquals(PartitioningProperty.HASH_PARTITIONED, sink.getGlobalProperties().getPartitioning());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set join and the delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
// this is only true if the update function preserves the partitioning
assertTrue(ssDelta instanceof DualInputPlanNode);
DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
// check the workset set join
SingleInputPlanNode sumReducer = (SingleInputPlanNode) ssJoin.getInput1().getSource();
SingleInputPlanNode gatherMapper = (SingleInputPlanNode) sumReducer.getInput().getSource();
DualInputPlanNode edgeJoin = (DualInputPlanNode) gatherMapper.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
// input1 is the workset
assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput1().getShipStrategy());
// input2 is the edges
assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput2().getShipStrategy());
assertTrue(edgeJoin.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), edgeJoin.getInput2().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations