use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class PartitionITCase method testRangePartitionerWithKeySelectorOnSequenceNestedDataWithOrders.
@Test
public void testRangePartitionerWithKeySelectorOnSequenceNestedDataWithOrders() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final DataSet<Tuple2<ComparablePojo, Long>> dataSet = env.generateSequence(0, 10000).map(new MapFunction<Long, Tuple2<ComparablePojo, Long>>() {
@Override
public Tuple2<ComparablePojo, Long> map(Long value) throws Exception {
return new Tuple2<>(new ComparablePojo(value / 5000, value % 5000), value);
}
});
final List<Tuple2<ComparablePojo, ComparablePojo>> collected = dataSet.partitionByRange(new KeySelector<Tuple2<ComparablePojo, Long>, ComparablePojo>() {
@Override
public ComparablePojo getKey(Tuple2<ComparablePojo, Long> value) throws Exception {
return value.f0;
}
}).withOrders(Order.ASCENDING).mapPartition(new MinMaxSelector<>(new ComparablePojoComparator())).mapPartition(new ExtractComparablePojo()).collect();
final Comparator<Tuple2<ComparablePojo, ComparablePojo>> pojoComparator = new Comparator<Tuple2<ComparablePojo, ComparablePojo>>() {
@Override
public int compare(Tuple2<ComparablePojo, ComparablePojo> o1, Tuple2<ComparablePojo, ComparablePojo> o2) {
return o1.f0.compareTo(o2.f1);
}
};
Collections.sort(collected, pojoComparator);
ComparablePojo previousMax = null;
for (Tuple2<ComparablePojo, ComparablePojo> element : collected) {
assertTrue("Min element in each partition should be smaller than max.", element.f0.compareTo(element.f1) <= 0);
if (previousMax == null) {
previousMax = element.f1;
} else {
assertTrue("Partitions overlap. Previous max should be smaller than current min.", previousMax.compareTo(element.f0) < 0);
if (previousMax.first.equals(element.f0.first)) {
assertEquals("Ordering on the second field should be continous.", previousMax.second - 1, element.f0.second.longValue());
}
previousMax = element.f1;
}
}
}
use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class PartitionITCase method testRangePartitionByKeySelector.
@Test
public void testRangePartitionByKeySelector() throws Exception {
/*
* Test range partition by key selector
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Long> uniqLongs = ds.partitionByRange(new KeySelector1()).mapPartition(new UniqueTupleLongMapper());
List<Long> result = uniqLongs.collect();
String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n";
compareResultAsText(result, expected);
}
use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class PartitionITCase method testRangePartitionInIteration.
@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {
// does not apply for collection execution
if (super.mode == TestExecutionMode.COLLECTION) {
throw new InvalidProgramException("Does not apply for collection execution");
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> source = env.generateSequence(0, 10000);
DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {
@Override
public Tuple2<Long, String> map(Long v) throws Exception {
return new Tuple2<>(v, Long.toString(v));
}
});
DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
DataSet<Tuple2<Long, String>> body = it.getWorkset().partitionByRange(// Verify that range partition is not allowed in iteration
1).join(it.getSolutionSet()).where(0).equalTo(0).projectFirst(0).projectSecond(1);
DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);
// should fail
result.collect();
}
use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class PartitionITCase method testHashPartitionByKeyFieldAndDifferentParallelism.
@Test
public void testHashPartitionByKeyFieldAndDifferentParallelism() throws Exception {
/*
* Test hash partition by key field and different parallelism
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Long> uniqLongs = ds.partitionByHash(1).setParallelism(4).mapPartition(new UniqueTupleLongMapper());
List<Long> result = uniqLongs.collect();
String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n";
compareResultAsText(result, expected);
}
use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class PartitionITCase method testHashPartitionWithKeyExpression.
@Test
public void testHashPartitionWithKeyExpression() throws Exception {
/*
* Test hash partition with key expression
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env);
DataSet<Long> uniqLongs = ds.partitionByHash("nestedPojo.longNumber").setParallelism(4).mapPartition(new UniqueNestedPojoLongMapper());
List<Long> result = uniqLongs.collect();
String expected = "10000\n" + "20000\n" + "30000\n";
compareResultAsText(result, expected);
}
Aggregations