use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class GroupOrderTest method testReduceWithGroupOrder.
@Test
public void testReduceWithGroupOrder() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple4<Long, Long, Long, Long>> set1 = env.readCsvFile("/tmp/fake.csv").types(Long.class, Long.class, Long.class, Long.class);
set1.groupBy(1).sortGroup(3, Order.DESCENDING).reduceGroup(new IdentityGroupReducer<Tuple4<Long, Long, Long, Long>>()).name("Reduce").output(new DiscardingOutputFormat<Tuple4<Long, Long, Long, Long>>()).name("Sink");
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan;
try {
oPlan = compileNoStats(plan);
} catch (CompilerException ce) {
ce.printStackTrace();
fail("The pact compiler is unable to compile this plan correctly.");
// silence the compiler
return;
}
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
SinkPlanNode sinkNode = resolver.getNode("Sink");
SingleInputPlanNode reducer = resolver.getNode("Reduce");
// verify the strategies
Assert.assertEquals(ShipStrategyType.FORWARD, sinkNode.getInput().getShipStrategy());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
Channel c = reducer.getInput();
Assert.assertEquals(LocalStrategy.SORT, c.getLocalStrategy());
FieldList ship = new FieldList(1);
FieldList local = new FieldList(1, 3);
Assert.assertEquals(ship, c.getShipStrategyKeys());
Assert.assertEquals(local, c.getLocalStrategyKeys());
Assert.assertTrue(c.getLocalStrategySortOrder()[0] == reducer.getSortOrders(0)[0]);
// check that we indeed sort descending
Assert.assertEquals(false, c.getLocalStrategySortOrder()[1]);
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithTwoInputs.
/**
* Checks that re-partitioning happens when the inputs of a two-input contract have different parallelisms.
*
* Test Plan:
* <pre>
*
* (source) -> reduce -\
* Match -> (sink)
* (source) -> reduce -/
*
* </pre>
*
*/
@Test
public void checkPropertyHandlingWithTwoInputs() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(5);
DataSet<Long> set2 = env.generateSequence(0, 1).setParallelism(7);
DataSet<Long> reduce1 = set1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(5);
DataSet<Long> reduce2 = set2.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(7);
reduce1.join(reduce2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).setParallelism(5).output(new DiscardingOutputFormat<Long>()).setParallelism(5);
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
//Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof DualInputPlanNode) {
DualInputPlanNode node = (DualInputPlanNode) visitable;
Channel c1 = node.getInput1();
Channel c2 = node.getInput2();
Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.FORWARD, c1.getShipStrategy());
Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.PARTITION_HASH, c2.getShipStrategy());
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
// DO NOTHING
}
});
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class UnionReplacementTest method testConsecutiveUnionsWithRebalance.
/**
*
* Checks that a plan with consecutive UNIONs followed by REBALANCE is correctly translated.
*
* The program can be illustrated as follows:
*
* Src1 -\
* >-> Union12--<
* Src2 -/ \
* >-> Union123 -> Rebalance -> Output
* Src3 ----------------/
*
* In the resulting plan, the Rebalance (ShippingStrategy.PARTITION_FORCED_REBALANCE) must be
* pushed to the inputs of the unions (Src1, Src2, Src3).
*
*/
@Test
public void testConsecutiveUnionsWithRebalance() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
union123.rebalance().output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
SingleInputPlanNode sink = resolver.getNode("out");
// check partitioning is correct
assertEquals("Sink input should be force rebalanced.", PartitioningProperty.FORCED_REBALANCED, sink.getInput().getGlobalProperties().getPartitioning());
SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
assertEquals("Partitioner input should be force rebalanced.", PartitioningProperty.FORCED_REBALANCED, partitioner.getInput().getGlobalProperties().getPartitioning());
assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
// all union inputs should be force rebalanced
for (Channel c : union.getInputs()) {
assertEquals("Union input should be force rebalanced", PartitioningProperty.FORCED_REBALANCED, c.getGlobalProperties().getPartitioning());
assertEquals("Union input channel should be rebalancing", ShipStrategyType.PARTITION_FORCED_REBALANCE, c.getShipStrategy());
assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class DistinctCompilationTest method testDistinctWithCombineHint.
@Test
public void testDistinctWithCombineHint() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.distinct().setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0, 1), reduceNode.getKeys(0));
assertEquals(new FieldList(0, 1), combineNode.getKeys(0));
assertEquals(new FieldList(0, 1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class PregelCompilerTest method testPregelCompiler.
@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations