Search in sources :

Example 6 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroSplittableInputFormatTest method testAvroRecoveryWithFailureAtStart.

@Test
public void testAvroRecoveryWithFailureAtStart() throws Exception {
    final int recordsUntilCheckpoint = 132;
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.reopen(splits[i], format.getCurrentState());
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
                // do the whole checkpoint-restore procedure and see if we pick up from where we left off.
                Tuple2<Long, Long> state = format.getCurrentState();
                // this is to make sure that nothing stays from the previous format
                // (as it is going to be in the normal case)
                format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
                format.reopen(splits[i], state);
                assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
            }
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 7 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroSplittableInputFormatTest method testSplittedIF.

@Test
public void testSplittedIF() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.open(splits[i]);
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 8 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroSplittableInputFormatTest method testAvroRecovery.

@Test
public void testAvroRecovery() throws Exception {
    final int recordsUntilCheckpoint = 132;
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.open(splits[i]);
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
                // do the whole checkpoint-restore procedure and see if we pick up from where we left off.
                Tuple2<Long, Long> state = format.getCurrentState();
                // this is to make sure that nothing stays from the previous format
                // (as it is going to be in the normal case)
                format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
                format.reopen(splits[i], state);
                assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
            }
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1539, elementsPerSplit[0]);
    Assert.assertEquals(1026, elementsPerSplit[1]);
    Assert.assertEquals(1539, elementsPerSplit[2]);
    Assert.assertEquals(896, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 9 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroRecordInputFormatTest method testDeserialisationGenericRecord.

/**
	 * Test if the AvroInputFormat is able to properly read data from an Avro
	 * file as a GenericRecord.
	 * 
	 * @throws IOException,
	 *             if there is an exception
	 */
@Test
public void testDeserialisationGenericRecord() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<GenericRecord> format = new AvroInputFormat<GenericRecord>(new Path(testFile.getAbsolutePath()), GenericRecord.class);
    doTestDeserializationGenericRecord(format, parameters);
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 10 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroRecordInputFormatTest method testDeserialisationReuseAvroRecordFalse.

/**
	 * Test if the AvroInputFormat is able to properly read data from an avro file.
	 * @throws IOException
	 */
@Test
public void testDeserialisationReuseAvroRecordFalse() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.setReuseAvroValue(false);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(1);
    assertEquals(splits.length, 1);
    format.open(splits[0]);
    User u = format.nextRecord(null);
    assertNotNull(u);
    String name = u.getName().toString();
    assertNotNull("empty record", name);
    assertEquals("name not equal", TEST_NAME, name);
    // check arrays
    List<CharSequence> sl = u.getTypeArrayString();
    assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
    assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
    List<Boolean> bl = u.getTypeArrayBoolean();
    assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
    assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
    // check enums
    Colors enumValue = u.getTypeEnum();
    assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
    // check maps
    Map<CharSequence, Long> lm = u.getTypeMap();
    assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
    assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
    assertFalse("expecting second element", format.reachedEnd());
    assertNotNull("expecting second element", format.nextRecord(u));
    assertNull(format.nextRecord(u));
    assertTrue(format.reachedEnd());
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Colors(org.apache.flink.api.io.avro.generated.Colors) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Aggregations

AvroInputFormat (org.apache.flink.api.java.io.AvroInputFormat)13 Path (org.apache.flink.core.fs.Path)13 Test (org.junit.Test)12 User (org.apache.flink.api.io.avro.generated.User)11 Configuration (org.apache.flink.configuration.Configuration)7 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)6 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)5 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 GenericRecord (org.apache.avro.generic.GenericRecord)2 Utf8 (org.apache.avro.util.Utf8)2 Colors (org.apache.flink.api.io.avro.generated.Colors)2 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1