Search in sources :

Example 16 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroOutputFormatITCase method testProgram.

@Override
protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<String, Integer, String>> input = env.readCsvFile(inputPath).fieldDelimiter("|").types(String.class, Integer.class, String.class);
    // output the data with AvroOutputFormat for specific user type
    DataSet<User> specificUser = input.map(new ConvertToUser());
    AvroOutputFormat<User> avroOutputFormat = new AvroOutputFormat<>(User.class);
    // FLINK-4771: use a codec
    avroOutputFormat.setCodec(Codec.SNAPPY);
    avroOutputFormat.setSchema(// FLINK-3304: Ensure the OF is properly serializing the schema
    User.SCHEMA$);
    specificUser.write(avroOutputFormat, outputPath1);
    // output the data with AvroOutputFormat for reflect user type
    DataSet<ReflectiveUser> reflectiveUser = specificUser.map(new ConvertToReflective());
    reflectiveUser.write(new AvroOutputFormat<>(ReflectiveUser.class), outputPath2);
    env.execute();
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.formats.avro.generated.User) Tuple3(org.apache.flink.api.java.tuple.Tuple3)

Example 17 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroOutputFormatTest method serializeAndDeserialize.

private void serializeAndDeserialize(final AvroOutputFormat.Codec codec, final Schema schema) throws IOException, ClassNotFoundException {
    // given
    final AvroOutputFormat<User> outputFormat = new AvroOutputFormat<>(User.class);
    if (codec != null) {
        outputFormat.setCodec(codec);
    }
    if (schema != null) {
        outputFormat.setSchema(schema);
    }
    final ByteArrayOutputStream bos = new ByteArrayOutputStream();
    // when
    try (final ObjectOutputStream oos = new ObjectOutputStream(bos)) {
        oos.writeObject(outputFormat);
    }
    try (final ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bos.toByteArray()))) {
        // then
        Object o = ois.readObject();
        assertTrue(o instanceof AvroOutputFormat);
        @SuppressWarnings("unchecked") final AvroOutputFormat<User> restored = (AvroOutputFormat<User>) o;
        final AvroOutputFormat.Codec restoredCodec = (AvroOutputFormat.Codec) Whitebox.getInternalState(restored, "codec");
        final Schema restoredSchema = (Schema) Whitebox.getInternalState(restored, "userDefinedSchema");
        assertTrue(codec != null ? restoredCodec == codec : restoredCodec == null);
        assertTrue(schema != null ? restoredSchema.equals(schema) : restoredSchema == null);
    }
}
Also used : User(org.apache.flink.formats.avro.generated.User) ByteArrayInputStream(java.io.ByteArrayInputStream) Schema(org.apache.avro.Schema) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) ObjectInputStream(java.io.ObjectInputStream)

Example 18 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroOutputFormatTest method testCompression.

@Test
public void testCompression() throws Exception {
    // given
    final Path outputPath = new Path(File.createTempFile("avro-output-file", "avro").getAbsolutePath());
    final AvroOutputFormat<User> outputFormat = new AvroOutputFormat<>(outputPath, User.class);
    outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
    final Path compressedOutputPath = new Path(File.createTempFile("avro-output-file", "compressed.avro").getAbsolutePath());
    final AvroOutputFormat<User> compressedOutputFormat = new AvroOutputFormat<>(compressedOutputPath, User.class);
    compressedOutputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
    compressedOutputFormat.setCodec(AvroOutputFormat.Codec.SNAPPY);
    // when
    output(outputFormat);
    output(compressedOutputFormat);
    // then
    assertTrue(fileSize(outputPath) > fileSize(compressedOutputPath));
    // cleanup
    FileSystem fs = FileSystem.getLocalFileSystem();
    fs.delete(outputPath, false);
    fs.delete(compressedOutputPath, false);
}
Also used : Path(org.apache.flink.core.fs.Path) User(org.apache.flink.formats.avro.generated.User) FileSystem(org.apache.flink.core.fs.FileSystem) Test(org.junit.Test)

Example 19 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroRecordInputFormatTest method testDeserialization.

/**
 * Test if the AvroInputFormat is able to properly read data from an Avro file.
 */
@Test
public void testDeserialization() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(1);
    assertEquals(splits.length, 1);
    format.open(splits[0]);
    User u = format.nextRecord(null);
    assertNotNull(u);
    String name = u.getName().toString();
    assertNotNull("empty record", name);
    assertEquals("name not equal", TEST_NAME, name);
    // check arrays
    List<CharSequence> sl = u.getTypeArrayString();
    assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
    assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
    List<Boolean> bl = u.getTypeArrayBoolean();
    assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
    assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
    // check enums
    Colors enumValue = u.getTypeEnum();
    assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
    // check maps
    Map<CharSequence, Long> lm = u.getTypeMap();
    assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
    assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
    assertFalse("expecting second element", format.reachedEnd());
    assertNotNull("expecting second element", format.nextRecord(u));
    assertNull(format.nextRecord(u));
    assertTrue(format.reachedEnd());
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) User(org.apache.flink.formats.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Colors(org.apache.flink.formats.avro.generated.Colors) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Example 20 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroRecordInputFormatTest method writeTestFile.

public static void writeTestFile(File testFile) throws IOException {
    ArrayList<CharSequence> stringArray = new ArrayList<>();
    stringArray.add(TEST_ARRAY_STRING_1);
    stringArray.add(TEST_ARRAY_STRING_2);
    ArrayList<Boolean> booleanArray = new ArrayList<>();
    booleanArray.add(TEST_ARRAY_BOOLEAN_1);
    booleanArray.add(TEST_ARRAY_BOOLEAN_2);
    HashMap<CharSequence, Long> longMap = new HashMap<>();
    longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
    longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
    Address addr = new Address();
    addr.setNum(TEST_NUM);
    addr.setStreet(TEST_STREET);
    addr.setCity(TEST_CITY);
    addr.setState(TEST_STATE);
    addr.setZip(TEST_ZIP);
    User user1 = new User();
    user1.setName(TEST_NAME);
    user1.setFavoriteNumber(256);
    user1.setTypeDoubleTest(123.45d);
    user1.setTypeBoolTest(true);
    user1.setTypeArrayString(stringArray);
    user1.setTypeArrayBoolean(booleanArray);
    user1.setTypeEnum(TEST_ENUM_COLOR);
    user1.setTypeMap(longMap);
    user1.setTypeNested(addr);
    user1.setTypeBytes(ByteBuffer.allocate(10));
    user1.setTypeDate(LocalDate.parse("2014-03-01"));
    user1.setTypeTimeMillis(LocalTime.parse("12:12:12"));
    user1.setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS));
    user1.setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z"));
    user1.setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS));
    // 20.00
    user1.setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
    // 20.00
    user1.setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
    // Construct via builder
    User user2 = User.newBuilder().setName("Charlie").setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<>()).setTypeArrayBoolean(new ArrayList<>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<>()).setTypeFixed(null).setTypeUnion(null).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).setTypeBytes(ByteBuffer.allocate(10)).setTypeDate(LocalDate.parse("2014-03-01")).setTypeTimeMillis(LocalTime.parse("12:12:12")).setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS)).setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z")).setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS)).setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())).setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())).build();
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), testFile);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    dataFileWriter.close();
}
Also used : User(org.apache.flink.formats.avro.generated.User) Address(org.apache.flink.formats.avro.generated.Address) HashMap(java.util.HashMap) DataFileWriter(org.apache.avro.file.DataFileWriter) ArrayList(java.util.ArrayList) Fixed2(org.apache.flink.formats.avro.generated.Fixed2) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Aggregations

User (org.apache.flink.formats.avro.generated.User)28 Test (org.junit.Test)19 Path (org.apache.flink.core.fs.Path)12 Fixed16 (org.apache.flink.formats.avro.generated.Fixed16)8 HashMap (java.util.HashMap)7 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)7 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 Configuration (org.apache.flink.configuration.Configuration)6 AvroInputFormat (org.apache.flink.formats.avro.AvroInputFormat)6 Address (org.apache.flink.formats.avro.generated.Address)6 Fixed2 (org.apache.flink.formats.avro.generated.Fixed2)6 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)5 AvroRecordInputFormatTest (org.apache.flink.formats.avro.AvroRecordInputFormatTest)5 File (java.io.File)4 ArrayList (java.util.ArrayList)4 Arrays (java.util.Arrays)4 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)4 Table (org.apache.flink.table.api.Table)4 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)4