use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.
the class AvroPojoTest method testSerializeWithAvro.
@Test
public void testSerializeWithAvro() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableForceAvro();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users).map(new MapFunction<User, User>() {
@Override
public User map(User value) throws Exception {
Map<CharSequence, Long> ab = new HashMap<CharSequence, Long>(1);
ab.put("hehe", 12L);
value.setTypeMap(ab);
return value;
}
});
usersDS.writeAsText(resultPath);
env.execute("Simple Avro read job");
expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, \"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], \"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\", \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, \"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], \"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n";
}
use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.
the class AvroPojoTest method testKeySelection.
@Test
public void testKeySelection() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy("name").reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {
@Override
public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
for (User u : values) {
out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
}
}
});
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Alyssa,1)\n(Charlie,1)\n";
}
use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.
the class AvroPojoTest method testSimpleAvroRead.
@Test
public void testSimpleAvroRead() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users).map(new MapFunction<User, User>() {
@Override
public User map(User value) throws Exception {
value.setTypeMap(null);
return value;
}
});
usersDS.writeAsText(resultPath);
env.execute("Simple Avro read job");
expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, \"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], \"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\", \"type_map\": null, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, \"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], \"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": null, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n";
}
use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.
the class AvroRecordInputFormatTest method testDeserialisationGenericRecordReuseAvroValueFalse.
/**
* Test if the AvroInputFormat is able to properly read data from an avro
* file as a GenericRecord
*
* @throws IOException,
* if there is an error
*/
@Test
public void testDeserialisationGenericRecordReuseAvroValueFalse() throws IOException {
Configuration parameters = new Configuration();
AvroInputFormat<GenericRecord> format = new AvroInputFormat<GenericRecord>(new Path(testFile.getAbsolutePath()), GenericRecord.class);
format.configure(parameters);
format.setReuseAvroValue(false);
doTestDeserializationGenericRecord(format, parameters);
}
use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.
the class AvroRecordInputFormatTest method testDeserialisation.
/**
* Test if the AvroInputFormat is able to properly read data from an avro file.
* @throws IOException
*/
@Test
public void testDeserialisation() throws IOException {
Configuration parameters = new Configuration();
AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
format.configure(parameters);
FileInputSplit[] splits = format.createInputSplits(1);
assertEquals(splits.length, 1);
format.open(splits[0]);
User u = format.nextRecord(null);
assertNotNull(u);
String name = u.getName().toString();
assertNotNull("empty record", name);
assertEquals("name not equal", TEST_NAME, name);
// check arrays
List<CharSequence> sl = u.getTypeArrayString();
assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
List<Boolean> bl = u.getTypeArrayBoolean();
assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
// check enums
Colors enumValue = u.getTypeEnum();
assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
// check maps
Map<CharSequence, Long> lm = u.getTypeMap();
assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
assertFalse("expecting second element", format.reachedEnd());
assertNotNull("expecting second element", format.nextRecord(u));
assertNull(format.nextRecord(u));
assertTrue(format.reachedEnd());
format.close();
}
Aggregations