Search in sources :

Example 6 with Tuple7

use of org.apache.flink.api.java.tuple.Tuple7 in project flink by apache.

the class OuterJoinITCase method testJoinWithCompositeKeyExpressions.

@Test
public void testJoinWithCompositeKeyExpressions() throws Exception {
    /*
		 * selecting multiple fields using expression language
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
    DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
    DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs = ds1.fullOuterJoin(ds2).where("nestedPojo.longNumber", "number", "str").equalTo("f6", "f0", "f1").with(new ProjectBothFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>());
    env.setParallelism(1);
    List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();
    String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" + "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" + "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
    compareResultAsTuples(result, expected);
}
Also used : POJO(org.apache.flink.test.javaApiOperators.util.CollectionDataSets.POJO) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple7(org.apache.flink.api.java.tuple.Tuple7) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 7 with Tuple7

use of org.apache.flink.api.java.tuple.Tuple7 in project flink by apache.

the class OuterJoinITCase method testJoinWithNestedKeyExpression2.

@Test
public void testJoinWithNestedKeyExpression2() throws Exception {
    /*
		 * Join nested pojo against tuple (selected as an integer)
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
    DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
    DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs = ds1.fullOuterJoin(ds2).where("nestedPojo.longNumber").equalTo(// <--- difference!
    6).with(new ProjectBothFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>());
    List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();
    String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" + "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" + "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
    compareResultAsTuples(result, expected);
}
Also used : POJO(org.apache.flink.test.javaApiOperators.util.CollectionDataSets.POJO) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple7(org.apache.flink.api.java.tuple.Tuple7) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 8 with Tuple7

use of org.apache.flink.api.java.tuple.Tuple7 in project flink by apache.

the class OuterJoinITCase method testFullPojoWithFullTuple.

@Test
public void testFullPojoWithFullTuple() throws Exception {
    /*
		 * full pojo with full tuple
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
    DataSet<Tuple7<Long, Integer, Integer, Long, String, Integer, String>> ds2 = CollectionDataSets.getSmallTuplebasedDataSetMatchingPojo(env);
    DataSet<Tuple2<POJO, Tuple7<Long, Integer, Integer, Long, String, Integer, String>>> joinDs = ds1.fullOuterJoin(ds2).where("*").equalTo("*").with(new ProjectBothFunction<POJO, Tuple7<Long, Integer, Integer, Long, String, Integer, String>>());
    env.setParallelism(1);
    List<Tuple2<POJO, Tuple7<Long, Integer, Integer, Long, String, Integer, String>>> result = joinDs.collect();
    String expected = "1 First (10,100,1000,One) 10000,(10000,10,100,1000,One,1,First)\n" + "2 Second (20,200,2000,Two) 20000,(20000,20,200,2000,Two,2,Second)\n" + "3 Third (30,300,3000,Three) 30000,(30000,30,300,3000,Three,3,Third)\n";
    compareResultAsTuples(result, expected);
}
Also used : POJO(org.apache.flink.test.javaApiOperators.util.CollectionDataSets.POJO) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple7(org.apache.flink.api.java.tuple.Tuple7) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 9 with Tuple7

use of org.apache.flink.api.java.tuple.Tuple7 in project flink by apache.

the class JoinITCase method testJoinNestedPojoAgainstTupleSelectedUsingString.

@Test
public void testJoinNestedPojoAgainstTupleSelectedUsingString() throws Exception {
    /*
		 * Join nested pojo against tuple (selected using a string)
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
    DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
    DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs = ds1.join(ds2).where("nestedPojo.longNumber").equalTo("f6");
    List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();
    String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" + "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" + "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
    compareResultAsTuples(result, expected);
}
Also used : POJO(org.apache.flink.test.javaApiOperators.util.CollectionDataSets.POJO) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple7(org.apache.flink.api.java.tuple.Tuple7) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 10 with Tuple7

use of org.apache.flink.api.java.tuple.Tuple7 in project flink by apache.

the class GroupOrderTest method testCoGroupWithGroupOrder.

@Test
public void testCoGroupWithGroupOrder() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple7<Long, Long, Long, Long, Long, Long, Long>> set1 = env.readCsvFile("/tmp/fake1.csv").types(Long.class, Long.class, Long.class, Long.class, Long.class, Long.class, Long.class);
    DataSet<Tuple7<Long, Long, Long, Long, Long, Long, Long>> set2 = env.readCsvFile("/tmp/fake2.csv").types(Long.class, Long.class, Long.class, Long.class, Long.class, Long.class, Long.class);
    set1.coGroup(set2).where(3, 0).equalTo(6, 0).sortFirstGroup(5, Order.DESCENDING).sortSecondGroup(1, Order.DESCENDING).sortSecondGroup(4, Order.ASCENDING).with(new IdentityCoGrouper<Tuple7<Long, Long, Long, Long, Long, Long, Long>>()).name("CoGroup").output(new DiscardingOutputFormat<Tuple7<Long, Long, Long, Long, Long, Long, Long>>()).name("Sink");
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan;
    try {
        oPlan = compileNoStats(plan);
    } catch (CompilerException ce) {
        ce.printStackTrace();
        fail("The pact compiler is unable to compile this plan correctly.");
        // silence the compiler
        return;
    }
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
    SinkPlanNode sinkNode = resolver.getNode("Sink");
    DualInputPlanNode coGroupNode = resolver.getNode("CoGroup");
    // verify the strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sinkNode.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, coGroupNode.getInput1().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, coGroupNode.getInput2().getShipStrategy());
    Channel c1 = coGroupNode.getInput1();
    Channel c2 = coGroupNode.getInput2();
    Assert.assertEquals(LocalStrategy.SORT, c1.getLocalStrategy());
    Assert.assertEquals(LocalStrategy.SORT, c2.getLocalStrategy());
    FieldList ship1 = new FieldList(3, 0);
    FieldList ship2 = new FieldList(6, 0);
    FieldList local1 = new FieldList(3, 0, 5);
    FieldList local2 = new FieldList(6, 0, 1, 4);
    Assert.assertEquals(ship1, c1.getShipStrategyKeys());
    Assert.assertEquals(ship2, c2.getShipStrategyKeys());
    Assert.assertEquals(local1, c1.getLocalStrategyKeys());
    Assert.assertEquals(local2, c2.getLocalStrategyKeys());
    Assert.assertTrue(c1.getLocalStrategySortOrder()[0] == coGroupNode.getSortOrders()[0]);
    Assert.assertTrue(c1.getLocalStrategySortOrder()[1] == coGroupNode.getSortOrders()[1]);
    Assert.assertTrue(c2.getLocalStrategySortOrder()[0] == coGroupNode.getSortOrders()[0]);
    Assert.assertTrue(c2.getLocalStrategySortOrder()[1] == coGroupNode.getSortOrders()[1]);
    // check that the local group orderings are correct
    Assert.assertEquals(false, c1.getLocalStrategySortOrder()[2]);
    Assert.assertEquals(false, c2.getLocalStrategySortOrder()[2]);
    Assert.assertEquals(true, c2.getLocalStrategySortOrder()[3]);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple7(org.apache.flink.api.java.tuple.Tuple7) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) IdentityCoGrouper(org.apache.flink.optimizer.testfunctions.IdentityCoGrouper) Test(org.junit.Test)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)16 Tuple7 (org.apache.flink.api.java.tuple.Tuple7)16 Test (org.junit.Test)16 POJO (org.apache.flink.test.javaApiOperators.util.CollectionDataSets.POJO)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 CustomType (org.apache.flink.test.javaApiOperators.util.CollectionDataSets.CustomType)3 Plan (org.apache.flink.api.common.Plan)1 FieldList (org.apache.flink.api.common.operators.util.FieldList)1 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)1 Channel (org.apache.flink.optimizer.plan.Channel)1 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)1 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)1 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)1 IdentityCoGrouper (org.apache.flink.optimizer.testfunctions.IdentityCoGrouper)1