use of org.apache.flink.api.java.DataSet in project flink by apache.
the class JobGraphGeneratorTest method testGeneratingJobGraphWithUnconsumedResultPartition.
@Test
public void testGeneratingJobGraphWithUnconsumedResultPartition() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> input = env.fromElements(new Tuple2<>(1L, 2L)).setParallelism(1);
DataSet<Tuple2<Long, Long>> ds = input.map(new IdentityMapper<>()).setParallelism(3);
AbstractID intermediateDataSetID = new AbstractID();
// this output branch will be excluded.
ds.output(BlockingShuffleOutputFormat.createOutputFormat(intermediateDataSetID)).setParallelism(1);
// this is the normal output branch.
ds.output(new DiscardingOutputFormat<>()).setParallelism(1);
JobGraph jobGraph = compileJob(env);
Assert.assertEquals(3, jobGraph.getVerticesSortedTopologicallyFromSources().size());
JobVertex mapVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
Assert.assertThat(mapVertex, Matchers.instanceOf(JobVertex.class));
// there are 2 output result with one of them is ResultPartitionType.BLOCKING_PERSISTENT
Assert.assertEquals(2, mapVertex.getProducedDataSets().size());
Assert.assertTrue(mapVertex.getProducedDataSets().stream().anyMatch(dataSet -> dataSet.getId().equals(new IntermediateDataSetID(intermediateDataSetID)) && dataSet.getResultType() == ResultPartitionType.BLOCKING_PERSISTENT));
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class UnionReplacementTest method testUnionForwardOutput.
/**
* Tests that a the outgoing connection of a Union node is FORWARD. See FLINK-9031 for a bug
* report.
*
* <p>The issue is quite hard to reproduce as the plan choice seems to depend on the enumeration
* order due to lack of plan costs. This test is a smaller variant of the job that was reported
* to fail.
*
* <p>/-\ /- PreFilter1 -\-/- Union - PostFilter1 - Reducer1 -\ Src -< >- Union -< X >- Union -
* Out \-/ \- PreFilter2 -/-\- Union - PostFilter2 - Reducer2 -/
*/
@Test
public void testUnionForwardOutput() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> u1 = src1.union(src1).map(new IdentityMapper<>());
DataSet<Tuple2<Long, Long>> s1 = u1.filter(x -> true).name("preFilter1");
DataSet<Tuple2<Long, Long>> s2 = u1.filter(x -> true).name("preFilter2");
DataSet<Tuple2<Long, Long>> reduced1 = s1.union(s2).filter(x -> true).name("postFilter1").groupBy(0).reduceGroup(new IdentityGroupReducer<>()).name("reducer1");
DataSet<Tuple2<Long, Long>> reduced2 = s1.union(s2).filter(x -> true).name("postFilter2").groupBy(1).reduceGroup(new IdentityGroupReducer<>()).name("reducer2");
reduced1.union(reduced2).output(new DiscardingOutputFormat<>());
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
SingleInputPlanNode unionOut1 = resolver.getNode("postFilter1");
SingleInputPlanNode unionOut2 = resolver.getNode("postFilter2");
assertEquals(ShipStrategyType.FORWARD, unionOut1.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, unionOut2.getInput().getShipStrategy());
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class AvroTypeExtractionTest method testSerializeWithAvro.
@Test
public void testSerializeWithAvro() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableForceAvro();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
DataSet<User> usersDS = env.createInput(users).map((MapFunction<User, User>) value -> {
Map<CharSequence, Long> ab = new HashMap<>(1);
ab.put("hehe", 12L);
value.setTypeMap(ab);
return value;
});
usersDS.writeAsText(resultPath);
env.execute("Simple Avro read job");
expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null," + " \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null," + " \"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"]," + " \"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\"," + " \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null," + " \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\"," + " \"state\": \"London\", \"zip\": \"NW1 6XE\"}," + " \"type_bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\", " + "\"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12, \"type_time_micros\": 00:00:00.123456, " + "\"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, " + "\"type_timestamp_micros\": 1970-01-01T00:00:00.123456Z, \"type_decimal_bytes\": \"\\u0007Ð\", " + "\"type_decimal_fixed\": [7, -48]}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, " + "\"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, " + "\"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], " + "\"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", " + "\"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, " + "\"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", " + "\"zip\": \"NW1 6XE\"}, " + "\"type_bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\", " + "\"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12, \"type_time_micros\": 00:00:00.123456, " + "\"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, " + "\"type_timestamp_micros\": 1970-01-01T00:00:00.123456Z, \"type_decimal_bytes\": \"\\u0007Ð\", " + "\"type_decimal_fixed\": [7, -48]}\n";
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class AvroTypeExtractionTest method testWithAvroGenericSer.
@Test
public void testWithAvroGenericSer() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableForceAvro();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy((KeySelector<User, String>) value -> String.valueOf(value.getName())).reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> {
for (User u : values) {
out.collect(new Tuple2<>(u.getName().toString(), 1));
}
}).returns(Types.TUPLE(Types.STRING, Types.INT));
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Charlie,1)\n(Alyssa,1)\n";
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class UnionTranslationTest method translateUnion3SortedGroup.
@Test
public void translateUnion3SortedGroup() {
try {
final int parallelism = 4;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> dataset1 = getSourceDataSet(env, 2);
DataSet<Tuple3<Double, StringValue, LongValue>> dataset2 = getSourceDataSet(env, 3);
DataSet<Tuple3<Double, StringValue, LongValue>> dataset3 = getSourceDataSet(env, -1);
dataset1.union(dataset2).union(dataset3).groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "").sortGroup((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "", Order.ASCENDING).reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {
}).returns(String.class).output(new DiscardingOutputFormat<>());
Plan p = env.createProgramPlan();
// The plan should look like the following one.
//
// DataSet1(2) - MapOperator(2)-+
// |- Union(-1) -+
// DataSet2(3) - MapOperator(3)-+ |- Union(-1) - SingleInputOperator - Sink
// |
// DataSet3(-1) - MapOperator(-1)-+
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
Union secondUnionOperator = (Union) ((SingleInputOperator) sink.getInput()).getInput();
// The first input of the second union should be the first union.
Union firstUnionOperator = (Union) secondUnionOperator.getFirstInput();
// The key mapper should be added to the second input stream of the second union.
assertTrue(secondUnionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);
// The key mappers should be added to both of the two input streams for the first union.
assertTrue(firstUnionOperator.getFirstInput() instanceof MapOperatorBase<?, ?, ?>);
assertTrue(firstUnionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);
// The parallelisms of the key mappers should be equal to those of their inputs.
assertEquals(firstUnionOperator.getFirstInput().getParallelism(), 2);
assertEquals(firstUnionOperator.getSecondInput().getParallelism(), 3);
assertEquals(secondUnionOperator.getSecondInput().getParallelism(), -1);
// The union should always have the default parallelism.
assertEquals(secondUnionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
assertEquals(firstUnionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Aggregations