use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class FlattenNestedKeyConverterTest method testConversion.
/**
* Test schema and record conversion
* 1. A successful schema and record conversion
* 2. Another successful conversion by reusing the converter
* 3. An expected failed conversion by reusing the converter
*/
public void testConversion() throws IOException {
String key = FlattenNestedKeyConverter.class.getSimpleName() + "." + FlattenNestedKeyConverter.FIELDS_TO_FLATTEN;
Properties props = new Properties();
props.put(key, "name,address.street_number");
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.addAll(props);
Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(inputSchema);
File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
GenericRecord inputRecord = dataFileReader.next();
FlattenNestedKeyConverter converter = new FlattenNestedKeyConverter();
Schema outputSchema = null;
try {
outputSchema = converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
Assert.fail(e.getMessage());
}
Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
Assert.assertTrue(outputSchema.getField("addressStreet_number") != null);
GenericRecord outputRecord = null;
try {
outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
} catch (DataConversionException e) {
Assert.fail(e.getMessage());
}
Object expected = AvroUtils.getFieldValue(outputRecord, "address.street_number").get();
Assert.assertTrue(outputRecord.get("addressStreet_number") == expected);
// Reuse the converter to do another successful conversion
props.put(key, "name,address.city");
workUnitState.addAll(props);
try {
outputSchema = converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
Assert.fail(e.getMessage());
}
Assert.assertTrue(outputSchema.getFields().size() == inputSchema.getFields().size() + 1);
Assert.assertTrue(outputSchema.getField("addressCity") != null);
try {
outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
} catch (DataConversionException e) {
Assert.fail(e.getMessage());
}
expected = AvroUtils.getFieldValue(outputRecord, "address.city").get();
Assert.assertTrue(outputRecord.get("addressCity") == expected);
// Reuse the converter to do a failed conversion
props.put(key, "name,address.anInvalidField");
workUnitState.addAll(props);
boolean hasAnException = false;
try {
converter.convertSchema(inputSchema, workUnitState);
} catch (SchemaConversionException e) {
hasAnException = true;
}
Assert.assertTrue(hasAnException);
}
use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class AvroFieldsPickConverterTest method testFieldsPickWithNestedRecord.
@Test
public void testFieldsPickWithNestedRecord() throws Exception {
Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/pickfields_nested_with_union.avsc"));
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(ConfigurationKeys.CONVERTER_AVRO_FIELD_PICK_FIELDS, "name,favorite_number,nested1.nested1_string,nested1.nested2_union.nested2_string");
try (AvroFieldsPickConverter converter = new AvroFieldsPickConverter()) {
Schema convertedSchema = converter.convertSchema(inputSchema, workUnitState);
Schema expectedSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/converted_pickfields_nested_with_union.avsc"));
JSONAssert.assertEquals(expectedSchema.toString(), convertedSchema.toString(), false);
try (DataFileReader<GenericRecord> srcDataFileReader = new DataFileReader<GenericRecord>(new File(getClass().getResource("/converter/pickfields_nested_with_union.avro").toURI()), new GenericDatumReader<GenericRecord>(inputSchema));
DataFileReader<GenericRecord> expectedDataFileReader = new DataFileReader<GenericRecord>(new File(getClass().getResource("/converter/converted_pickfields_nested_with_union.avro").toURI()), new GenericDatumReader<GenericRecord>(expectedSchema))) {
while (expectedDataFileReader.hasNext()) {
GenericRecord expected = expectedDataFileReader.next();
GenericRecord actual = converter.convertRecord(convertedSchema, srcDataFileReader.next(), workUnitState).iterator().next();
Assert.assertEquals(actual, expected);
}
Assert.assertTrue(!srcDataFileReader.hasNext());
}
}
}
use of org.apache.avro.file.DataFileReader in project parquet-mr by apache.
the class TestStringBehavior method testGeneric.
@Test
public void testGeneric() throws IOException {
GenericRecord avroRecord;
try (DataFileReader<GenericRecord> avro = new DataFileReader<>(avroFile, new GenericDatumReader<>(SCHEMA))) {
avroRecord = avro.next();
}
GenericRecord parquetRecord;
Configuration conf = new Configuration();
conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
AvroReadSupport.setAvroDataSupplier(conf, GenericDataSupplier.class);
AvroReadSupport.setAvroReadSchema(conf, SCHEMA);
try (ParquetReader<GenericRecord> parquet = AvroParquetReader.<GenericRecord>builder(parquetFile).withConf(conf).build()) {
parquetRecord = parquet.read();
}
Assert.assertEquals("Avro default string class should be Utf8", Utf8.class, avroRecord.get("default_class").getClass());
Assert.assertEquals("Parquet default string class should be Utf8", Utf8.class, parquetRecord.get("default_class").getClass());
Assert.assertEquals("Avro avro.java.string=String class should be String", String.class, avroRecord.get("string_class").getClass());
Assert.assertEquals("Parquet avro.java.string=String class should be String", String.class, parquetRecord.get("string_class").getClass());
Assert.assertEquals("Avro stringable class should be Utf8", Utf8.class, avroRecord.get("stringable_class").getClass());
Assert.assertEquals("Parquet stringable class should be Utf8", Utf8.class, parquetRecord.get("stringable_class").getClass());
Assert.assertEquals("Avro map default string class should be Utf8", Utf8.class, keyClass(avroRecord.get("default_map")));
Assert.assertEquals("Parquet map default string class should be Utf8", Utf8.class, keyClass(parquetRecord.get("default_map")));
Assert.assertEquals("Avro map avro.java.string=String class should be String", String.class, keyClass(avroRecord.get("string_map")));
Assert.assertEquals("Parquet map avro.java.string=String class should be String", String.class, keyClass(parquetRecord.get("string_map")));
Assert.assertEquals("Avro map stringable class should be Utf8", Utf8.class, keyClass(avroRecord.get("stringable_map")));
Assert.assertEquals("Parquet map stringable class should be Utf8", Utf8.class, keyClass(parquetRecord.get("stringable_map")));
}
use of org.apache.avro.file.DataFileReader in project parquet-mr by apache.
the class TestStringBehavior method testSpecific.
@Test
public void testSpecific() throws IOException {
org.apache.parquet.avro.StringBehaviorTest avroRecord;
try (DataFileReader<org.apache.parquet.avro.StringBehaviorTest> avro = new DataFileReader<>(avroFile, new SpecificDatumReader<>(org.apache.parquet.avro.StringBehaviorTest.getClassSchema()))) {
avroRecord = avro.next();
}
org.apache.parquet.avro.StringBehaviorTest parquetRecord;
Configuration conf = new Configuration();
conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
AvroReadSupport.setAvroDataSupplier(conf, SpecificDataSupplier.class);
AvroReadSupport.setAvroReadSchema(conf, org.apache.parquet.avro.StringBehaviorTest.getClassSchema());
try (ParquetReader<org.apache.parquet.avro.StringBehaviorTest> parquet = AvroParquetReader.<org.apache.parquet.avro.StringBehaviorTest>builder(parquetFile).withConf(conf).build()) {
parquetRecord = parquet.read();
}
Assert.assertEquals("Avro default string class should be String", Utf8.class, avroRecord.getDefaultClass().getClass());
Assert.assertEquals("Parquet default string class should be String", Utf8.class, parquetRecord.getDefaultClass().getClass());
Assert.assertEquals("Avro avro.java.string=String class should be String", String.class, avroRecord.getStringClass().getClass());
Assert.assertEquals("Parquet avro.java.string=String class should be String", String.class, parquetRecord.getStringClass().getClass());
Assert.assertEquals("Avro stringable class should be BigDecimal", BigDecimal.class, avroRecord.getStringableClass().getClass());
Assert.assertEquals("Parquet stringable class should be BigDecimal", BigDecimal.class, parquetRecord.getStringableClass().getClass());
Assert.assertEquals("Should have the correct BigDecimal value", BIG_DECIMAL, parquetRecord.getStringableClass());
Assert.assertEquals("Avro map default string class should be String", Utf8.class, keyClass(avroRecord.getDefaultMap()));
Assert.assertEquals("Parquet map default string class should be String", Utf8.class, keyClass(parquetRecord.getDefaultMap()));
Assert.assertEquals("Avro map avro.java.string=String class should be String", String.class, keyClass(avroRecord.getStringMap()));
Assert.assertEquals("Parquet map avro.java.string=String class should be String", String.class, keyClass(parquetRecord.getStringMap()));
Assert.assertEquals("Avro map stringable class should be BigDecimal", BigDecimal.class, keyClass(avroRecord.getStringableMap()));
Assert.assertEquals("Parquet map stringable class should be BigDecimal", BigDecimal.class, keyClass(parquetRecord.getStringableMap()));
}
use of org.apache.avro.file.DataFileReader in project mist by snuspl.
the class AsyncDiskQueryInfoStore method loadFromFile.
/**
* Load the stored dag from File.
* @param storedPlanFile file
* @return chained dag
* @throws IOException
*/
private AvroDag loadFromFile(final File storedPlanFile) throws IOException {
final DataFileReader<AvroDag> dataFileReader = new DataFileReader<AvroDag>(storedPlanFile, datumReader);
AvroDag dag = null;
dag = dataFileReader.next(dag);
return dag;
}
Aggregations