Search in sources :

Example 41 with DataFileReader

use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.

the class FlattenNestedKeyConverterTest method testConversion.

/**
 * Test schema and record conversion
 *  1. A successful schema and record conversion
 *  2. Another successful conversion by reusing the converter
 *  3. An expected failed conversion by reusing the converter
 */
public void testConversion() throws IOException {
    String key = FlattenNestedKeyConverter.class.getSimpleName() + "." + FlattenNestedKeyConverter.FIELDS_TO_FLATTEN;
    Properties props = new Properties();
    props.put(key, "name,address.street_number");
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.addAll(props);
    Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
    GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(inputSchema);
    File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
    FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
    GenericRecord inputRecord = dataFileReader.next();
    FlattenNestedKeyConverter converter = new FlattenNestedKeyConverter();
    Schema outputSchema = null;
    try {
        outputSchema = converter.convertSchema(inputSchema, workUnitState);
    } catch (SchemaConversionException e) {
        Assert.fail(e.getMessage());
    }
    Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
    Assert.assertTrue(outputSchema.getField("addressStreet_number") != null);
    GenericRecord outputRecord = null;
    try {
        outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
    } catch (DataConversionException e) {
        Assert.fail(e.getMessage());
    }
    Object expected = AvroUtils.getFieldValue(outputRecord, "address.street_number").get();
    Assert.assertTrue(outputRecord.get("addressStreet_number") == expected);
    // Reuse the converter to do another successful conversion
    props.put(key, "name,address.city");
    workUnitState.addAll(props);
    try {
        outputSchema = converter.convertSchema(inputSchema, workUnitState);
    } catch (SchemaConversionException e) {
        Assert.fail(e.getMessage());
    }
    Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
    Assert.assertTrue(outputSchema.getField("addressCity") != null);
    try {
        outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
    } catch (DataConversionException e) {
        Assert.fail(e.getMessage());
    }
    expected = AvroUtils.getFieldValue(outputRecord, "address.city").get();
    Assert.assertTrue(outputRecord.get("addressCity") == expected);
    // Reuse the converter to do a failed conversion
    props.put(key, "name,address.anInvalidField");
    workUnitState.addAll(props);
    boolean hasAnException = false;
    try {
        converter.convertSchema(inputSchema, workUnitState);
    } catch (SchemaConversionException e) {
        hasAnException = true;
    }
    Assert.assertTrue(hasAnException);
}
Also used : SchemaConversionException(org.apache.gobblin.converter.SchemaConversionException) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) Properties(java.util.Properties) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) DataConversionException(org.apache.gobblin.converter.DataConversionException) File(java.io.File)

Example 42 with DataFileReader

use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.

the class AvroFieldsPickConverterTest method testFieldsPickWithNestedRecord.

@Test
public void testFieldsPickWithNestedRecord() throws Exception {
    Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/pickfields_nested_with_union.avsc"));
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.setProp(ConfigurationKeys.CONVERTER_AVRO_FIELD_PICK_FIELDS, "name,favorite_number,nested1.nested1_string,nested1.nested2_union.nested2_string");
    try (AvroFieldsPickConverter converter = new AvroFieldsPickConverter()) {
        Schema convertedSchema = converter.convertSchema(inputSchema, workUnitState);
        Schema expectedSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/converted_pickfields_nested_with_union.avsc"));
        JSONAssert.assertEquals(expectedSchema.toString(), convertedSchema.toString(), false);
        try (DataFileReader<GenericRecord> srcDataFileReader = new DataFileReader<GenericRecord>(new File(getClass().getResource("/converter/pickfields_nested_with_union.avro").toURI()), new GenericDatumReader<GenericRecord>(inputSchema));
            DataFileReader<GenericRecord> expectedDataFileReader = new DataFileReader<GenericRecord>(new File(getClass().getResource("/converter/converted_pickfields_nested_with_union.avro").toURI()), new GenericDatumReader<GenericRecord>(expectedSchema))) {
            while (expectedDataFileReader.hasNext()) {
                GenericRecord expected = expectedDataFileReader.next();
                GenericRecord actual = converter.convertRecord(convertedSchema, srcDataFileReader.next(), workUnitState).iterator().next();
                Assert.assertEquals(actual, expected);
            }
            Assert.assertTrue(!srcDataFileReader.hasNext());
        }
    }
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.testng.annotations.Test)

Example 43 with DataFileReader

use of org.apache.avro.file.DataFileReader in project parquet-mr by apache.

the class TestStringBehavior method testGeneric.

@Test
public void testGeneric() throws IOException {
    GenericRecord avroRecord;
    try (DataFileReader<GenericRecord> avro = new DataFileReader<>(avroFile, new GenericDatumReader<>(SCHEMA))) {
        avroRecord = avro.next();
    }
    GenericRecord parquetRecord;
    Configuration conf = new Configuration();
    conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
    AvroReadSupport.setAvroDataSupplier(conf, GenericDataSupplier.class);
    AvroReadSupport.setAvroReadSchema(conf, SCHEMA);
    try (ParquetReader<GenericRecord> parquet = AvroParquetReader.<GenericRecord>builder(parquetFile).withConf(conf).build()) {
        parquetRecord = parquet.read();
    }
    Assert.assertEquals("Avro default string class should be Utf8", Utf8.class, avroRecord.get("default_class").getClass());
    Assert.assertEquals("Parquet default string class should be Utf8", Utf8.class, parquetRecord.get("default_class").getClass());
    Assert.assertEquals("Avro avro.java.string=String class should be String", String.class, avroRecord.get("string_class").getClass());
    Assert.assertEquals("Parquet avro.java.string=String class should be String", String.class, parquetRecord.get("string_class").getClass());
    Assert.assertEquals("Avro stringable class should be Utf8", Utf8.class, avroRecord.get("stringable_class").getClass());
    Assert.assertEquals("Parquet stringable class should be Utf8", Utf8.class, parquetRecord.get("stringable_class").getClass());
    Assert.assertEquals("Avro map default string class should be Utf8", Utf8.class, keyClass(avroRecord.get("default_map")));
    Assert.assertEquals("Parquet map default string class should be Utf8", Utf8.class, keyClass(parquetRecord.get("default_map")));
    Assert.assertEquals("Avro map avro.java.string=String class should be String", String.class, keyClass(avroRecord.get("string_map")));
    Assert.assertEquals("Parquet map avro.java.string=String class should be String", String.class, keyClass(parquetRecord.get("string_map")));
    Assert.assertEquals("Avro map stringable class should be Utf8", Utf8.class, keyClass(avroRecord.get("stringable_map")));
    Assert.assertEquals("Parquet map stringable class should be Utf8", Utf8.class, keyClass(parquetRecord.get("stringable_map")));
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) Configuration(org.apache.hadoop.conf.Configuration) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 44 with DataFileReader

use of org.apache.avro.file.DataFileReader in project parquet-mr by apache.

the class TestStringBehavior method testSpecific.

@Test
public void testSpecific() throws IOException {
    org.apache.parquet.avro.StringBehaviorTest avroRecord;
    try (DataFileReader<org.apache.parquet.avro.StringBehaviorTest> avro = new DataFileReader<>(avroFile, new SpecificDatumReader<>(org.apache.parquet.avro.StringBehaviorTest.getClassSchema()))) {
        avroRecord = avro.next();
    }
    org.apache.parquet.avro.StringBehaviorTest parquetRecord;
    Configuration conf = new Configuration();
    conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
    AvroReadSupport.setAvroDataSupplier(conf, SpecificDataSupplier.class);
    AvroReadSupport.setAvroReadSchema(conf, org.apache.parquet.avro.StringBehaviorTest.getClassSchema());
    try (ParquetReader<org.apache.parquet.avro.StringBehaviorTest> parquet = AvroParquetReader.<org.apache.parquet.avro.StringBehaviorTest>builder(parquetFile).withConf(conf).build()) {
        parquetRecord = parquet.read();
    }
    Assert.assertEquals("Avro default string class should be String", Utf8.class, avroRecord.getDefaultClass().getClass());
    Assert.assertEquals("Parquet default string class should be String", Utf8.class, parquetRecord.getDefaultClass().getClass());
    Assert.assertEquals("Avro avro.java.string=String class should be String", String.class, avroRecord.getStringClass().getClass());
    Assert.assertEquals("Parquet avro.java.string=String class should be String", String.class, parquetRecord.getStringClass().getClass());
    Assert.assertEquals("Avro stringable class should be BigDecimal", BigDecimal.class, avroRecord.getStringableClass().getClass());
    Assert.assertEquals("Parquet stringable class should be BigDecimal", BigDecimal.class, parquetRecord.getStringableClass().getClass());
    Assert.assertEquals("Should have the correct BigDecimal value", BIG_DECIMAL, parquetRecord.getStringableClass());
    Assert.assertEquals("Avro map default string class should be String", Utf8.class, keyClass(avroRecord.getDefaultMap()));
    Assert.assertEquals("Parquet map default string class should be String", Utf8.class, keyClass(parquetRecord.getDefaultMap()));
    Assert.assertEquals("Avro map avro.java.string=String class should be String", String.class, keyClass(avroRecord.getStringMap()));
    Assert.assertEquals("Parquet map avro.java.string=String class should be String", String.class, keyClass(parquetRecord.getStringMap()));
    Assert.assertEquals("Avro map stringable class should be BigDecimal", BigDecimal.class, keyClass(avroRecord.getStringableMap()));
    Assert.assertEquals("Parquet map stringable class should be BigDecimal", BigDecimal.class, keyClass(parquetRecord.getStringableMap()));
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) Configuration(org.apache.hadoop.conf.Configuration) Test(org.junit.Test)

Example 45 with DataFileReader

use of org.apache.avro.file.DataFileReader in project mist by snuspl.

the class AsyncDiskQueryInfoStore method loadFromFile.

/**
 * Load the stored dag from File.
 * @param storedPlanFile file
 * @return chained dag
 * @throws IOException
 */
private AvroDag loadFromFile(final File storedPlanFile) throws IOException {
    final DataFileReader<AvroDag> dataFileReader = new DataFileReader<AvroDag>(storedPlanFile, datumReader);
    AvroDag dag = null;
    dag = dataFileReader.next(dag);
    return dag;
}
Also used : AvroDag(edu.snu.mist.formats.avro.AvroDag) DataFileReader(org.apache.avro.file.DataFileReader)

Aggregations

DataFileReader (org.apache.avro.file.DataFileReader)46 GenericRecord (org.apache.avro.generic.GenericRecord)28 File (java.io.File)26 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)21 Schema (org.apache.avro.Schema)20 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)9 IOException (java.io.IOException)8 Test (org.testng.annotations.Test)7 SeekableInput (org.apache.avro.file.SeekableInput)6 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)6 Configuration (org.apache.hadoop.conf.Configuration)6 ReflectDatumReader (org.apache.avro.reflect.ReflectDatumReader)5 SeekableByteArrayInput (org.apache.avro.file.SeekableByteArrayInput)4 FsInput (org.apache.avro.mapred.FsInput)4 SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)4 Utf8 (org.apache.avro.util.Utf8)4 JsonObject (com.google.gson.JsonObject)2 AvroDag (edu.snu.mist.formats.avro.AvroDag)2 Date (java.sql.Date)2