Search in sources :

Example 6 with User

use of org.apache.flink.api.io.avro.generated.User in project flink by apache.

the class AvroRecordInputFormatTest method writeTestFile.

public static void writeTestFile(File testFile) throws IOException {
    ArrayList<CharSequence> stringArray = new ArrayList<CharSequence>();
    stringArray.add(TEST_ARRAY_STRING_1);
    stringArray.add(TEST_ARRAY_STRING_2);
    ArrayList<Boolean> booleanArray = new ArrayList<Boolean>();
    booleanArray.add(TEST_ARRAY_BOOLEAN_1);
    booleanArray.add(TEST_ARRAY_BOOLEAN_2);
    HashMap<CharSequence, Long> longMap = new HashMap<CharSequence, Long>();
    longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
    longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
    Address addr = new Address();
    addr.setNum(TEST_NUM);
    addr.setStreet(TEST_STREET);
    addr.setCity(TEST_CITY);
    addr.setState(TEST_STATE);
    addr.setZip(TEST_ZIP);
    User user1 = new User();
    user1.setName(TEST_NAME);
    user1.setFavoriteNumber(256);
    user1.setTypeDoubleTest(123.45d);
    user1.setTypeBoolTest(true);
    user1.setTypeArrayString(stringArray);
    user1.setTypeArrayBoolean(booleanArray);
    user1.setTypeEnum(TEST_ENUM_COLOR);
    user1.setTypeMap(longMap);
    user1.setTypeNested(addr);
    // Construct via builder
    User user2 = User.newBuilder().setName("Charlie").setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<CharSequence>()).setTypeArrayBoolean(new ArrayList<Boolean>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<CharSequence, Long>()).setTypeFixed(null).setTypeUnion(null).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).build();
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), testFile);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    dataFileWriter.close();
}
Also used : User(org.apache.flink.api.io.avro.generated.User) Address(org.apache.flink.api.io.avro.generated.Address) DataFileWriter(org.apache.avro.file.DataFileWriter) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Example 7 with User

use of org.apache.flink.api.io.avro.generated.User in project flink by apache.

the class AvroSplittableInputFormatTest method createFiles.

@Before
public void createFiles() throws IOException {
    testFile = File.createTempFile("AvroSplittableInputFormatTest", null);
    ArrayList<CharSequence> stringArray = new ArrayList<CharSequence>();
    stringArray.add(TEST_ARRAY_STRING_1);
    stringArray.add(TEST_ARRAY_STRING_2);
    ArrayList<Boolean> booleanArray = new ArrayList<Boolean>();
    booleanArray.add(TEST_ARRAY_BOOLEAN_1);
    booleanArray.add(TEST_ARRAY_BOOLEAN_2);
    HashMap<CharSequence, Long> longMap = new HashMap<CharSequence, Long>();
    longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
    longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
    Address addr = new Address();
    addr.setNum(new Integer(TEST_NUM));
    addr.setStreet(TEST_STREET);
    addr.setCity(TEST_CITY);
    addr.setState(TEST_STATE);
    addr.setZip(TEST_ZIP);
    User user1 = new User();
    user1.setName(TEST_NAME);
    user1.setFavoriteNumber(256);
    user1.setTypeDoubleTest(123.45d);
    user1.setTypeBoolTest(true);
    user1.setTypeArrayString(stringArray);
    user1.setTypeArrayBoolean(booleanArray);
    user1.setTypeEnum(TEST_ENUM_COLOR);
    user1.setTypeMap(longMap);
    user1.setTypeNested(addr);
    // Construct via builder
    User user2 = User.newBuilder().setName(TEST_NAME).setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<CharSequence>()).setTypeArrayBoolean(new ArrayList<Boolean>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<CharSequence, Long>()).setTypeFixed(new Fixed16()).setTypeUnion(123L).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).build();
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), testFile);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    Random rnd = new Random(1337);
    for (int i = 0; i < NUM_RECORDS - 2; i++) {
        User user = new User();
        user.setName(TEST_NAME + rnd.nextInt());
        user.setFavoriteNumber(rnd.nextInt());
        user.setTypeDoubleTest(rnd.nextDouble());
        user.setTypeBoolTest(true);
        user.setTypeArrayString(stringArray);
        user.setTypeArrayBoolean(booleanArray);
        user.setTypeEnum(TEST_ENUM_COLOR);
        user.setTypeMap(longMap);
        Address address = new Address();
        address.setNum(new Integer(TEST_NUM));
        address.setStreet(TEST_STREET);
        address.setCity(TEST_CITY);
        address.setState(TEST_STATE);
        address.setZip(TEST_ZIP);
        user.setTypeNested(address);
        dataFileWriter.append(user);
    }
    dataFileWriter.close();
}
Also used : User(org.apache.flink.api.io.avro.generated.User) Address(org.apache.flink.api.io.avro.generated.Address) HashMap(java.util.HashMap) DataFileWriter(org.apache.avro.file.DataFileWriter) ArrayList(java.util.ArrayList) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter) Fixed16(org.apache.flink.api.io.avro.generated.Fixed16) Random(java.util.Random) Before(org.junit.Before)

Example 8 with User

use of org.apache.flink.api.io.avro.generated.User in project flink by apache.

the class AvroSplittableInputFormatTest method testAvroRecoveryWithFailureAtStart.

@Test
public void testAvroRecoveryWithFailureAtStart() throws Exception {
    final int recordsUntilCheckpoint = 132;
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.reopen(splits[i], format.getCurrentState());
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
                // do the whole checkpoint-restore procedure and see if we pick up from where we left off.
                Tuple2<Long, Long> state = format.getCurrentState();
                // this is to make sure that nothing stays from the previous format
                // (as it is going to be in the normal case)
                format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
                format.reopen(splits[i], state);
                assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
            }
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 9 with User

use of org.apache.flink.api.io.avro.generated.User in project flink by apache.

the class AvroSplittableInputFormatTest method testSplittedIF.

@Test
public void testSplittedIF() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.open(splits[i]);
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 10 with User

use of org.apache.flink.api.io.avro.generated.User in project flink by apache.

the class AvroSplittableInputFormatTest method testAvroRecovery.

@Test
public void testAvroRecovery() throws Exception {
    final int recordsUntilCheckpoint = 132;
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.open(splits[i]);
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
                // do the whole checkpoint-restore procedure and see if we pick up from where we left off.
                Tuple2<Long, Long> state = format.getCurrentState();
                // this is to make sure that nothing stays from the previous format
                // (as it is going to be in the normal case)
                format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
                format.reopen(splits[i], state);
                assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
            }
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Aggregations

User (org.apache.flink.api.io.avro.generated.User)15 Test (org.junit.Test)12 AvroInputFormat (org.apache.flink.api.java.io.AvroInputFormat)11 Path (org.apache.flink.core.fs.Path)11 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)6 Configuration (org.apache.flink.configuration.Configuration)5 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)5 HashMap (java.util.HashMap)3 Address (org.apache.flink.api.io.avro.generated.Address)3 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 Random (java.util.Random)2 DataFileWriter (org.apache.avro.file.DataFileWriter)2 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)2 Utf8 (org.apache.avro.util.Utf8)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 Colors (org.apache.flink.api.io.avro.generated.Colors)2 Fixed16 (org.apache.flink.api.io.avro.generated.Fixed16)2 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1