use of org.apache.flink.formats.avro.generated.User in project flink by apache.
the class AvroOutputFormatITCase method testProgram.
@Override
protected void testProgram() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<String, Integer, String>> input = env.readCsvFile(inputPath).fieldDelimiter("|").types(String.class, Integer.class, String.class);
// output the data with AvroOutputFormat for specific user type
DataSet<User> specificUser = input.map(new ConvertToUser());
AvroOutputFormat<User> avroOutputFormat = new AvroOutputFormat<>(User.class);
// FLINK-4771: use a codec
avroOutputFormat.setCodec(Codec.SNAPPY);
avroOutputFormat.setSchema(// FLINK-3304: Ensure the OF is properly serializing the schema
User.SCHEMA$);
specificUser.write(avroOutputFormat, outputPath1);
// output the data with AvroOutputFormat for reflect user type
DataSet<ReflectiveUser> reflectiveUser = specificUser.map(new ConvertToReflective());
reflectiveUser.write(new AvroOutputFormat<>(ReflectiveUser.class), outputPath2);
env.execute();
}
use of org.apache.flink.formats.avro.generated.User in project flink by apache.
the class AvroOutputFormatTest method serializeAndDeserialize.
private void serializeAndDeserialize(final AvroOutputFormat.Codec codec, final Schema schema) throws IOException, ClassNotFoundException {
// given
final AvroOutputFormat<User> outputFormat = new AvroOutputFormat<>(User.class);
if (codec != null) {
outputFormat.setCodec(codec);
}
if (schema != null) {
outputFormat.setSchema(schema);
}
final ByteArrayOutputStream bos = new ByteArrayOutputStream();
// when
try (final ObjectOutputStream oos = new ObjectOutputStream(bos)) {
oos.writeObject(outputFormat);
}
try (final ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bos.toByteArray()))) {
// then
Object o = ois.readObject();
assertTrue(o instanceof AvroOutputFormat);
@SuppressWarnings("unchecked") final AvroOutputFormat<User> restored = (AvroOutputFormat<User>) o;
final AvroOutputFormat.Codec restoredCodec = (AvroOutputFormat.Codec) Whitebox.getInternalState(restored, "codec");
final Schema restoredSchema = (Schema) Whitebox.getInternalState(restored, "userDefinedSchema");
assertTrue(codec != null ? restoredCodec == codec : restoredCodec == null);
assertTrue(schema != null ? restoredSchema.equals(schema) : restoredSchema == null);
}
}
use of org.apache.flink.formats.avro.generated.User in project flink by apache.
the class AvroOutputFormatTest method testCompression.
@Test
public void testCompression() throws Exception {
// given
final Path outputPath = new Path(File.createTempFile("avro-output-file", "avro").getAbsolutePath());
final AvroOutputFormat<User> outputFormat = new AvroOutputFormat<>(outputPath, User.class);
outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
final Path compressedOutputPath = new Path(File.createTempFile("avro-output-file", "compressed.avro").getAbsolutePath());
final AvroOutputFormat<User> compressedOutputFormat = new AvroOutputFormat<>(compressedOutputPath, User.class);
compressedOutputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
compressedOutputFormat.setCodec(AvroOutputFormat.Codec.SNAPPY);
// when
output(outputFormat);
output(compressedOutputFormat);
// then
assertTrue(fileSize(outputPath) > fileSize(compressedOutputPath));
// cleanup
FileSystem fs = FileSystem.getLocalFileSystem();
fs.delete(outputPath, false);
fs.delete(compressedOutputPath, false);
}
use of org.apache.flink.formats.avro.generated.User in project flink by apache.
the class AvroRecordInputFormatTest method testDeserialization.
/**
* Test if the AvroInputFormat is able to properly read data from an Avro file.
*/
@Test
public void testDeserialization() throws IOException {
Configuration parameters = new Configuration();
AvroInputFormat<User> format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
format.configure(parameters);
FileInputSplit[] splits = format.createInputSplits(1);
assertEquals(splits.length, 1);
format.open(splits[0]);
User u = format.nextRecord(null);
assertNotNull(u);
String name = u.getName().toString();
assertNotNull("empty record", name);
assertEquals("name not equal", TEST_NAME, name);
// check arrays
List<CharSequence> sl = u.getTypeArrayString();
assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
List<Boolean> bl = u.getTypeArrayBoolean();
assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
// check enums
Colors enumValue = u.getTypeEnum();
assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
// check maps
Map<CharSequence, Long> lm = u.getTypeMap();
assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
assertFalse("expecting second element", format.reachedEnd());
assertNotNull("expecting second element", format.nextRecord(u));
assertNull(format.nextRecord(u));
assertTrue(format.reachedEnd());
format.close();
}
use of org.apache.flink.formats.avro.generated.User in project flink by apache.
the class AvroRecordInputFormatTest method writeTestFile.
public static void writeTestFile(File testFile) throws IOException {
ArrayList<CharSequence> stringArray = new ArrayList<>();
stringArray.add(TEST_ARRAY_STRING_1);
stringArray.add(TEST_ARRAY_STRING_2);
ArrayList<Boolean> booleanArray = new ArrayList<>();
booleanArray.add(TEST_ARRAY_BOOLEAN_1);
booleanArray.add(TEST_ARRAY_BOOLEAN_2);
HashMap<CharSequence, Long> longMap = new HashMap<>();
longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
Address addr = new Address();
addr.setNum(TEST_NUM);
addr.setStreet(TEST_STREET);
addr.setCity(TEST_CITY);
addr.setState(TEST_STATE);
addr.setZip(TEST_ZIP);
User user1 = new User();
user1.setName(TEST_NAME);
user1.setFavoriteNumber(256);
user1.setTypeDoubleTest(123.45d);
user1.setTypeBoolTest(true);
user1.setTypeArrayString(stringArray);
user1.setTypeArrayBoolean(booleanArray);
user1.setTypeEnum(TEST_ENUM_COLOR);
user1.setTypeMap(longMap);
user1.setTypeNested(addr);
user1.setTypeBytes(ByteBuffer.allocate(10));
user1.setTypeDate(LocalDate.parse("2014-03-01"));
user1.setTypeTimeMillis(LocalTime.parse("12:12:12"));
user1.setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS));
user1.setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z"));
user1.setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS));
// 20.00
user1.setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
// 20.00
user1.setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
// Construct via builder
User user2 = User.newBuilder().setName("Charlie").setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<>()).setTypeArrayBoolean(new ArrayList<>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<>()).setTypeFixed(null).setTypeUnion(null).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).setTypeBytes(ByteBuffer.allocate(10)).setTypeDate(LocalDate.parse("2014-03-01")).setTypeTimeMillis(LocalTime.parse("12:12:12")).setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS)).setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z")).setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS)).setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())).setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())).build();
DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter);
dataFileWriter.create(user1.getSchema(), testFile);
dataFileWriter.append(user1);
dataFileWriter.append(user2);
dataFileWriter.close();
}
Aggregations