Search in sources :

Example 61 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class PartitionITCase method testRangePartitionerWithKeySelectorOnSequenceNestedDataWithOrders.

@Test
public void testRangePartitionerWithKeySelectorOnSequenceNestedDataWithOrders() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    final DataSet<Tuple2<ComparablePojo, Long>> dataSet = env.generateSequence(0, 10000).map(new MapFunction<Long, Tuple2<ComparablePojo, Long>>() {

        @Override
        public Tuple2<ComparablePojo, Long> map(Long value) throws Exception {
            return new Tuple2<>(new ComparablePojo(value / 5000, value % 5000), value);
        }
    });
    final List<Tuple2<ComparablePojo, ComparablePojo>> collected = dataSet.partitionByRange(new KeySelector<Tuple2<ComparablePojo, Long>, ComparablePojo>() {

        @Override
        public ComparablePojo getKey(Tuple2<ComparablePojo, Long> value) throws Exception {
            return value.f0;
        }
    }).withOrders(Order.ASCENDING).mapPartition(new MinMaxSelector<>(new ComparablePojoComparator())).mapPartition(new ExtractComparablePojo()).collect();
    final Comparator<Tuple2<ComparablePojo, ComparablePojo>> pojoComparator = new Comparator<Tuple2<ComparablePojo, ComparablePojo>>() {

        @Override
        public int compare(Tuple2<ComparablePojo, ComparablePojo> o1, Tuple2<ComparablePojo, ComparablePojo> o2) {
            return o1.f0.compareTo(o2.f1);
        }
    };
    Collections.sort(collected, pojoComparator);
    ComparablePojo previousMax = null;
    for (Tuple2<ComparablePojo, ComparablePojo> element : collected) {
        assertTrue("Min element in each partition should be smaller than max.", element.f0.compareTo(element.f1) <= 0);
        if (previousMax == null) {
            previousMax = element.f1;
        } else {
            assertTrue("Partitions overlap. Previous max should be smaller than current min.", previousMax.compareTo(element.f0) < 0);
            if (previousMax.first.equals(element.f0.first)) {
                assertEquals("Ordering on the second field should be continous.", previousMax.second - 1, element.f0.second.longValue());
            }
            previousMax = element.f1;
        }
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Comparator(java.util.Comparator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 62 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class PartitionITCase method testRangePartitionByKeySelector.

@Test
public void testRangePartitionByKeySelector() throws Exception {
    /*
		 * Test range partition by key selector
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
    DataSet<Long> uniqLongs = ds.partitionByRange(new KeySelector1()).mapPartition(new UniqueTupleLongMapper());
    List<Long> result = uniqLongs.collect();
    String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n";
    compareResultAsText(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 63 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class PartitionITCase method testRangePartitionInIteration.

@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {
    // does not apply for collection execution
    if (super.mode == TestExecutionMode.COLLECTION) {
        throw new InvalidProgramException("Does not apply for collection execution");
    }
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSource<Long> source = env.generateSequence(0, 10000);
    DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {

        @Override
        public Tuple2<Long, String> map(Long v) throws Exception {
            return new Tuple2<>(v, Long.toString(v));
        }
    });
    DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
    DataSet<Tuple2<Long, String>> body = it.getWorkset().partitionByRange(// Verify that range partition is not allowed in iteration
    1).join(it.getSolutionSet()).where(0).equalTo(0).projectFirst(0).projectSecond(1);
    DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);
    // should fail
    result.collect();
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Test(org.junit.Test)

Example 64 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class PartitionITCase method testHashPartitionByKeyFieldAndDifferentParallelism.

@Test
public void testHashPartitionByKeyFieldAndDifferentParallelism() throws Exception {
    /*
		 * Test hash partition by key field and different parallelism
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(3);
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
    DataSet<Long> uniqLongs = ds.partitionByHash(1).setParallelism(4).mapPartition(new UniqueTupleLongMapper());
    List<Long> result = uniqLongs.collect();
    String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n";
    compareResultAsText(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 65 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class PartitionITCase method testHashPartitionWithKeyExpression.

@Test
public void testHashPartitionWithKeyExpression() throws Exception {
    /*
		 * Test hash partition with key expression
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(3);
    DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env);
    DataSet<Long> uniqLongs = ds.partitionByHash("nestedPojo.longNumber").setParallelism(4).mapPartition(new UniqueNestedPojoLongMapper());
    List<Long> result = uniqLongs.collect();
    String expected = "10000\n" + "20000\n" + "30000\n";
    compareResultAsText(result, expected);
}
Also used : POJO(org.apache.flink.test.javaApiOperators.util.CollectionDataSets.POJO) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Test(org.junit.Test)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1247 Test (org.junit.Test)1090 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)374 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)264 Plan (org.apache.flink.api.common.Plan)238 Tuple5 (org.apache.flink.api.java.tuple.Tuple5)236 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)199 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)139 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)138 Vertex (org.apache.flink.graph.Vertex)93 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)73 Edge (org.apache.flink.graph.Edge)70 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)66 ArrayList (java.util.ArrayList)57 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)49 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)44 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)39 BatchTableEnvironment (org.apache.flink.table.api.java.BatchTableEnvironment)38 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)37 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)35