use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields2.
@Test
public void testRangePartitionOperatorPreservesFields2() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
PartitionOperator<Tuple2<Long, Long>> rangePartitioned = data.partitionByRange(1);
rangePartitioned.groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
data.groupBy(0).aggregate(Aggregations.SUM, 1).map(new MapFunction<Tuple2<Long, Long>, Long>() {
@Override
public Long map(Tuple2<Long, Long> value) throws Exception {
return value.f1;
}
}).output(new DiscardingOutputFormat<Long>());
rangePartitioned.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) throws Exception {
return value.f0 % 2 == 0;
}
}).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
assertEquals(3, sourceOutgoingChannels.size());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(2).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(1).getDataExchangeMode());
assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(2).getDataExchangeMode());
List<Channel> partitionOutputChannels = partitionNode.getOutgoingChannels();
assertEquals(2, partitionOutputChannels.size());
assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(1).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(1).getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class ConnectedComponentsCoGroupTest method testWorksetConnectedComponents.
@Test
public void testWorksetConnectedComponents() {
Plan plan = getConnectedComponentsCoGroupPlan();
plan.setExecutionConfig(new ExecutionConfig());
OptimizedPlan optPlan = compileNoStats(plan);
OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
if (PRINT_PLAN) {
PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
String json = dumper.getOptimizerPlanAsJSON(optPlan);
System.out.println(json);
}
SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
SinkPlanNode sink = or.getNode(SINK);
WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);
DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
DualInputPlanNode cogroup = or.getNode(MIN_ID_AND_UPDATE);
// --------------------------------------------------------------------
// Plan validation:
//
// We expect the plan to go with a sort-merge join, because the CoGroup
// sorts and the join in the successive iteration can re-exploit the sorting.
// --------------------------------------------------------------------
// test all drivers
Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());
Assert.assertEquals(DriverStrategy.INNER_MERGE, neighborsJoin.getDriverStrategy());
Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());
Assert.assertEquals(DriverStrategy.CO_GROUP, cogroup.getDriverStrategy());
Assert.assertEquals(set0, cogroup.getKeysForInput1());
Assert.assertEquals(set0, cogroup.getKeysForInput2());
// test all the shipping strategies
Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());
// workset
Assert.assertEquals(ShipStrategyType.FORWARD, neighborsJoin.getInput1().getShipStrategy());
// edges
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, neighborsJoin.getInput2().getShipStrategy());
Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
Assert.assertTrue(neighborsJoin.getInput2().getTempMode().isCached());
// min id
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, cogroup.getInput1().getShipStrategy());
// solution set
Assert.assertEquals(ShipStrategyType.FORWARD, cogroup.getInput2().getShipStrategy());
// test all the local strategies
Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());
// the sort for the neighbor join in the first iteration is pushed out of the loop
Assert.assertEquals(LocalStrategy.SORT, iter.getInitialWorksetInput().getLocalStrategy());
// workset
Assert.assertEquals(LocalStrategy.NONE, neighborsJoin.getInput1().getLocalStrategy());
// edges
Assert.assertEquals(LocalStrategy.SORT, neighborsJoin.getInput2().getLocalStrategy());
Assert.assertEquals(LocalStrategy.SORT, cogroup.getInput1().getLocalStrategy());
// solution set
Assert.assertEquals(LocalStrategy.NONE, cogroup.getInput2().getLocalStrategy());
// check the caches
Assert.assertTrue(TempMode.CACHED == neighborsJoin.getInput2().getTempMode());
JobGraphGenerator jgg = new JobGraphGenerator();
jgg.compileJobGraph(optPlan);
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class PageRankCompilerTest method testPageRank.
@Test
public void testPageRank() {
try {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<Long> pagesInput = env.fromElements(1l);
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> linksInput = env.fromElements(new Tuple2<Long, Long>(1l, 2l));
// assign initial rank to pages
DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.map(new RankAssigner((1.0d / 10)));
// build adjacency list from link input
DataSet<Tuple2<Long, Long[]>> adjacencyListInput = linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());
// set iterative data set
IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(10);
Configuration cfg = new Configuration();
cfg.setString(Optimizer.HINT_LOCAL_STRATEGY, Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
DataSet<Tuple2<Long, Double>> newRanks = iteration.join(adjacencyListInput).where(0).equalTo(0).withParameters(cfg).flatMap(new JoinVertexWithEdgesMatch()).groupBy(0).aggregate(SUM, 1).map(new Dampener(0.85, 10));
DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(newRanks, newRanks.join(iteration).where(0).equalTo(0).filter(new EpsilonFilter()));
finalPageRanks.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
// get the plan and compile it
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sinkPlanNode = (SinkPlanNode) op.getDataSinks().iterator().next();
BulkIterationPlanNode iterPlanNode = (BulkIterationPlanNode) sinkPlanNode.getInput().getSource();
// check that the partitioning is pushed out of the first loop
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iterPlanNode.getInput().getShipStrategy());
Assert.assertEquals(LocalStrategy.NONE, iterPlanNode.getInput().getLocalStrategy());
BulkPartialSolutionPlanNode partSolPlanNode = iterPlanNode.getPartialSolutionPlanNode();
Assert.assertEquals(ShipStrategyType.FORWARD, partSolPlanNode.getOutgoingChannels().get(0).getShipStrategy());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class RelationalQueryCompilerTest method testQueryGeneric.
private void testQueryGeneric(Plan p, long orderSize, long lineitemSize, float orderSelectivity, float joinSelectivity, boolean broadcastOkay, boolean partitionedOkay, boolean hashJoinFirstOkay, boolean hashJoinSecondOkay, boolean mergeJoinOkay) {
try {
// set statistics
OperatorResolver cr = getContractResolver(p);
GenericDataSourceBase<?, ?> ordersSource = cr.getNode(ORDERS);
GenericDataSourceBase<?, ?> lineItemSource = cr.getNode(LINEITEM);
SingleInputOperator<?, ?, ?> mapper = cr.getNode(MAPPER_NAME);
DualInputOperator<?, ?, ?, ?> joiner = cr.getNode(JOIN_NAME);
setSourceStatistics(ordersSource, orderSize, 100f);
setSourceStatistics(lineItemSource, lineitemSize, 140f);
mapper.getCompilerHints().setAvgOutputRecordSize(16f);
mapper.getCompilerHints().setFilterFactor(orderSelectivity);
joiner.getCompilerHints().setFilterFactor(joinSelectivity);
// compile
final OptimizedPlan plan = compileWithStats(p);
final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan);
// get the nodes from the final plan
final SinkPlanNode sink = or.getNode(SINK);
final SingleInputPlanNode reducer = or.getNode(REDUCE_NAME);
final SingleInputPlanNode combiner = reducer.getPredecessor() instanceof SingleInputPlanNode ? (SingleInputPlanNode) reducer.getPredecessor() : null;
final DualInputPlanNode join = or.getNode(JOIN_NAME);
final SingleInputPlanNode filteringMapper = or.getNode(MAPPER_NAME);
checkStandardStrategies(filteringMapper, join, combiner, reducer, sink);
// check the possible variants and that the variant ia allowed in this specific setting
if (checkBroadcastShipStrategies(join, reducer, combiner)) {
Assert.assertTrue("Broadcast join incorrectly chosen.", broadcastOkay);
if (checkHashJoinStrategies(join, reducer, true)) {
Assert.assertTrue("Hash join (build orders) incorrectly chosen", hashJoinFirstOkay);
} else if (checkHashJoinStrategies(join, reducer, false)) {
Assert.assertTrue("Hash join (build lineitem) incorrectly chosen", hashJoinSecondOkay);
} else if (checkBroadcastMergeJoin(join, reducer)) {
Assert.assertTrue("Merge join incorrectly chosen", mergeJoinOkay);
} else {
Assert.fail("Plan has no correct hash join or merge join strategies.");
}
} else if (checkRepartitionShipStrategies(join, reducer, combiner)) {
Assert.assertTrue("Partitioned join incorrectly chosen.", partitionedOkay);
if (checkHashJoinStrategies(join, reducer, true)) {
Assert.assertTrue("Hash join (build orders) incorrectly chosen", hashJoinFirstOkay);
} else if (checkHashJoinStrategies(join, reducer, false)) {
Assert.assertTrue("Hash join (build lineitem) incorrectly chosen", hashJoinSecondOkay);
} else if (checkRepartitionMergeJoin(join, reducer)) {
Assert.assertTrue("Merge join incorrectly chosen", mergeJoinOkay);
} else {
Assert.fail("Plan has no correct hash join or merge join strategies.");
}
} else {
Assert.fail("Plan has neither correct BC join or partitioned join configuration.");
}
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class PregelCompilerTest method testPregelCompilerWithBroadcastVariable.
@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
try {
final String BC_VAR_NAME = "borat variable";
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Long> bcVar = env.fromElements(1L);
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
VertexCentricConfiguration parameters = new VertexCentricConfiguration();
parameters.addBroadcastSet(BC_VAR_NAME, bcVar);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100, parameters).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations