use of org.apache.flink.optimizer.testfunctions.IdentityCoGrouper in project flink by apache.
the class GroupOrderTest method testCoGroupWithGroupOrder.
@Test
public void testCoGroupWithGroupOrder() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple7<Long, Long, Long, Long, Long, Long, Long>> set1 = env.readCsvFile("/tmp/fake1.csv").types(Long.class, Long.class, Long.class, Long.class, Long.class, Long.class, Long.class);
DataSet<Tuple7<Long, Long, Long, Long, Long, Long, Long>> set2 = env.readCsvFile("/tmp/fake2.csv").types(Long.class, Long.class, Long.class, Long.class, Long.class, Long.class, Long.class);
set1.coGroup(set2).where(3, 0).equalTo(6, 0).sortFirstGroup(5, Order.DESCENDING).sortSecondGroup(1, Order.DESCENDING).sortSecondGroup(4, Order.ASCENDING).with(new IdentityCoGrouper<Tuple7<Long, Long, Long, Long, Long, Long, Long>>()).name("CoGroup").output(new DiscardingOutputFormat<Tuple7<Long, Long, Long, Long, Long, Long, Long>>()).name("Sink");
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan;
try {
oPlan = compileNoStats(plan);
} catch (CompilerException ce) {
ce.printStackTrace();
fail("The pact compiler is unable to compile this plan correctly.");
// silence the compiler
return;
}
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
SinkPlanNode sinkNode = resolver.getNode("Sink");
DualInputPlanNode coGroupNode = resolver.getNode("CoGroup");
// verify the strategies
Assert.assertEquals(ShipStrategyType.FORWARD, sinkNode.getInput().getShipStrategy());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, coGroupNode.getInput1().getShipStrategy());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, coGroupNode.getInput2().getShipStrategy());
Channel c1 = coGroupNode.getInput1();
Channel c2 = coGroupNode.getInput2();
Assert.assertEquals(LocalStrategy.SORT, c1.getLocalStrategy());
Assert.assertEquals(LocalStrategy.SORT, c2.getLocalStrategy());
FieldList ship1 = new FieldList(3, 0);
FieldList ship2 = new FieldList(6, 0);
FieldList local1 = new FieldList(3, 0, 5);
FieldList local2 = new FieldList(6, 0, 1, 4);
Assert.assertEquals(ship1, c1.getShipStrategyKeys());
Assert.assertEquals(ship2, c2.getShipStrategyKeys());
Assert.assertEquals(local1, c1.getLocalStrategyKeys());
Assert.assertEquals(local2, c2.getLocalStrategyKeys());
Assert.assertTrue(c1.getLocalStrategySortOrder()[0] == coGroupNode.getSortOrders()[0]);
Assert.assertTrue(c1.getLocalStrategySortOrder()[1] == coGroupNode.getSortOrders()[1]);
Assert.assertTrue(c2.getLocalStrategySortOrder()[0] == coGroupNode.getSortOrders()[0]);
Assert.assertTrue(c2.getLocalStrategySortOrder()[1] == coGroupNode.getSortOrders()[1]);
// check that the local group orderings are correct
Assert.assertEquals(false, c1.getLocalStrategySortOrder()[2]);
Assert.assertEquals(false, c2.getLocalStrategySortOrder()[2]);
Assert.assertEquals(true, c2.getLocalStrategySortOrder()[3]);
}
use of org.apache.flink.optimizer.testfunctions.IdentityCoGrouper in project flink by apache.
the class BranchingPlansCompilerTest method testBranchEachContractType.
@SuppressWarnings("unchecked")
@Test
public void testBranchEachContractType() {
try {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> sourceA = env.generateSequence(0, 1);
DataSet<Long> sourceB = env.generateSequence(0, 1);
DataSet<Long> sourceC = env.generateSequence(0, 1);
DataSet<Long> map1 = sourceA.map(new IdentityMapper<Long>()).name("Map 1");
DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
DataSet<Long> join1 = sourceB.union(sourceB).union(sourceC).join(sourceC).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
DataSet<Long> coGroup1 = sourceA.coGroup(sourceB).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 1");
DataSet<Long> cross1 = reduce1.cross(coGroup1).with(new IdentityCrosser<Long>()).name("Cross 1");
DataSet<Long> coGroup2 = cross1.coGroup(cross1).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 2");
DataSet<Long> coGroup3 = map1.coGroup(join1).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 3");
DataSet<Long> map2 = coGroup3.map(new IdentityMapper<Long>()).name("Map 2");
DataSet<Long> coGroup4 = map2.coGroup(join1).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 4");
DataSet<Long> coGroup5 = coGroup2.coGroup(coGroup1).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 5");
DataSet<Long> coGroup6 = reduce1.coGroup(coGroup4).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 6");
DataSet<Long> coGroup7 = coGroup5.coGroup(coGroup6).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 7");
coGroup7.union(sourceA).union(coGroup3).union(coGroup4).union(coGroup1).output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
Aggregations