use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class AvroExternalTable method getSchemaFromAvroDataFile.
private Schema getSchemaFromAvroDataFile() throws IOException {
String firstDataFilePath = HdfsReader.getFirstDataFilePathInDir(this.dataLocationInHdfs);
LOG.info("Extracting schema for table " + this.name + " from avro data file " + firstDataFilePath);
SeekableInput sin = new HdfsReader(firstDataFilePath).getFsInput();
try (DataFileReader<Void> dfr = new DataFileReader<>(sin, new GenericDatumReader<Void>())) {
Schema schema = dfr.getSchema();
return schema;
}
}
use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class AvroToRestJsonEntryConverterTest method testConversion.
private void testConversion(RestEntry<JsonObject> expected, WorkUnitState actualWorkUnitState) throws DataConversionException, IOException, JSONException {
Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
tmp.deleteOnExit();
try {
FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
GenericRecord avroRecord = dataFileReader.next();
AvroToRestJsonEntryConverter converter = new AvroToRestJsonEntryConverter();
RestEntry<JsonObject> actual = converter.convertRecord(null, avroRecord, actualWorkUnitState).iterator().next();
Assert.assertEquals(actual.getResourcePath(), expected.getResourcePath());
JSONAssert.assertEquals(expected.getRestEntryVal().toString(), actual.getRestEntryVal().toString(), false);
converter.close();
dataFileReader.close();
} finally {
if (tmp != null) {
tmp.delete();
}
}
}
use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class AvroGenericRecordAccessorTest method updateRecordFromTestResource.
private void updateRecordFromTestResource(String resourceName, String avroFileName) throws IOException {
if (avroFileName == null) {
avroFileName = resourceName + ".avro";
}
recordSchema = new Schema.Parser().parse(getClass().getClassLoader().getResourceAsStream(resourceName + ".avsc"));
DatumReader<GenericRecord> reader = new GenericDatumReader<>(recordSchema);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(getClass().getClassLoader().getResource(avroFileName).getPath()), reader);
Assert.assertTrue(dataFileReader.hasNext());
record = dataFileReader.next(record);
accessor = new AvroGenericRecordAccessor(record);
}
use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class FileAwareInputStreamExtractorWithCheckSchema method schemaChecking.
/**
* Use {@link AvroSchemaCheckStrategy} to make sure the real schema and the expected schema have matching field names and types
* @param fsFromFile
* @return
* @throws IOException
*/
protected boolean schemaChecking(FileSystem fsFromFile) throws IOException {
if (!this.state.getPropAsBoolean(CopySource.SCHEMA_CHECK_ENABLED, CopySource.DEFAULT_SCHEMA_CHECK_ENABLED)) {
return true;
}
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader(new FsInput(this.file.getFileStatus().getPath(), new Configuration()), datumReader);
Schema schema = dataFileReader.getSchema();
if (this.state.getProp(ConfigurationKeys.COPY_EXPECTED_SCHEMA) == null) {
throw new IOException("Expected schema is not set properly");
}
Schema expectedSchema = new Schema.Parser().parse(this.state.getProp(ConfigurationKeys.COPY_EXPECTED_SCHEMA));
AvroSchemaCheckStrategy strategy = AvroSchemaCheckStrategy.AvroSchemaCheckStrategyFactory.create(this.state);
if (strategy == null) {
throw new IOException("schema check strategy cannot be initialized");
}
return strategy.compare(expectedSchema, schema);
}
use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class AvroStringFieldDecryptorConverterTest method getRecordFromFile.
private GenericRecord getRecordFromFile(String path) throws IOException {
DatumReader<GenericRecord> reader = new GenericDatumReader<>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(path), reader);
while (dataFileReader.hasNext()) {
return dataFileReader.next();
}
return null;
}
Aggregations