use of org.apache.avro.mapred.FsInput in project incubator-gobblin by apache.
the class AvroUtils method getDirectorySchema.
/**
* Get the latest avro schema for a directory
* @param directory the input dir that contains avro files
* @param fs the {@link FileSystem} for the given directory.
* @param latest true to return latest schema, false to return oldest schema
* @return the latest/oldest schema in the directory
* @throws IOException
*/
public static Schema getDirectorySchema(Path directory, FileSystem fs, boolean latest) throws IOException {
Schema schema = null;
try (Closer closer = Closer.create()) {
List<FileStatus> files = getDirectorySchemaHelper(directory, fs);
if (files == null || files.size() == 0) {
LOG.warn("There is no previous avro file in the directory: " + directory);
} else {
FileStatus file = latest ? files.get(0) : files.get(files.size() - 1);
LOG.debug("Path to get the avro schema: " + file);
FsInput fi = new FsInput(file.getPath(), fs.getConf());
GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<>();
schema = closer.register(new DataFileReader<>(fi, genReader)).getSchema();
}
} catch (IOException ioe) {
throw new IOException("Cannot get the schema for directory " + directory, ioe);
}
return schema;
}
Aggregations