use of org.apache.avro.file.SeekableInput in project flink by apache.
the class AvroInputFormat method initReader.
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
DatumReader<E> datumReader;
if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
datumReader = new GenericDatumReader<E>();
} else {
datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
}
if (LOG.isInfoEnabled()) {
LOG.info("Opening split {}", split);
}
SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
}
end = split.getStart() + split.getLength();
recordsReadSinceLastSync = 0;
return dataFileReader;
}
use of org.apache.avro.file.SeekableInput in project incubator-gobblin by apache.
the class AvroExternalTable method getSchemaFromAvroDataFile.
private Schema getSchemaFromAvroDataFile() throws IOException {
String firstDataFilePath = HdfsReader.getFirstDataFilePathInDir(this.dataLocationInHdfs);
LOG.info("Extracting schema for table " + this.name + " from avro data file " + firstDataFilePath);
SeekableInput sin = new HdfsReader(firstDataFilePath).getFsInput();
try (DataFileReader<Void> dfr = new DataFileReader<>(sin, new GenericDatumReader<Void>())) {
Schema schema = dfr.getSchema();
return schema;
}
}
use of org.apache.avro.file.SeekableInput in project incubator-gobblin by apache.
the class AvroUtilsTest method getRecordFromFile.
public static List<GenericRecord> getRecordFromFile(String path) throws IOException {
Configuration config = new Configuration();
SeekableInput input = new FsInput(new Path(path), config);
DatumReader<GenericRecord> reader1 = new GenericDatumReader<>();
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1);
List<GenericRecord> records = new ArrayList<>();
for (GenericRecord datum : fileReader) {
records.add(datum);
}
fileReader.close();
return records;
}
use of org.apache.avro.file.SeekableInput in project parquet-mr by apache.
the class SchemaCommand method getParquetSchema.
private String getParquetSchema(String source) throws IOException {
Formats.Format format;
try (SeekableInput in = openSeekable(source)) {
format = Formats.detectFormat((InputStream) in);
in.seek(0);
switch(format) {
case PARQUET:
return new ParquetFileReader(getConf(), qualifiedPath(source), ParquetMetadataConverter.NO_FILTER).getFileMetaData().getSchema().toString();
default:
throw new IllegalArgumentException(String.format("Could not get a Parquet schema for format %s: %s", format, source));
}
}
}
use of org.apache.avro.file.SeekableInput in project parquet-mr by apache.
the class BaseCommand method getAvroSchema.
protected Schema getAvroSchema(String source) throws IOException {
Formats.Format format;
try (SeekableInput in = openSeekable(source)) {
format = Formats.detectFormat((InputStream) in);
in.seek(0);
switch(format) {
case PARQUET:
return Schemas.fromParquet(getConf(), qualifiedURI(source));
case AVRO:
return Schemas.fromAvro(open(source));
case TEXT:
if (source.endsWith("avsc")) {
return Schemas.fromAvsc(open(source));
} else if (source.endsWith("json")) {
return Schemas.fromJSON("json", open(source));
}
default:
}
throw new IllegalArgumentException(String.format("Could not determine file format of %s.", source));
}
}
Aggregations