use of org.apache.avro.mapred.FsInput in project crunch by cloudera.
the class AvroFileReaderFactory method read.
@Override
public Iterator<T> read(FileSystem fs, final Path path) {
this.mapFn.setConfigurationForTest(conf);
this.mapFn.initialize();
try {
FsInput fsi = new FsInput(path, fs.getConf());
final DataFileReader<T> reader = new DataFileReader<T>(fsi, recordReader);
return new UnmodifiableIterator<T>() {
@Override
public boolean hasNext() {
return reader.hasNext();
}
@Override
public T next() {
return mapFn.map(reader.next());
}
};
} catch (IOException e) {
LOG.info("Could not read avro file at path: " + path, e);
return Iterators.emptyIterator();
}
}
use of org.apache.avro.mapred.FsInput in project incubator-gobblin by apache.
the class AvroUtilsTest method getRecordFromFile.
public static List<GenericRecord> getRecordFromFile(String path) throws IOException {
Configuration config = new Configuration();
SeekableInput input = new FsInput(new Path(path), config);
DatumReader<GenericRecord> reader1 = new GenericDatumReader<>();
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1);
List<GenericRecord> records = new ArrayList<>();
for (GenericRecord datum : fileReader) {
records.add(datum);
}
fileReader.close();
return records;
}
use of org.apache.avro.mapred.FsInput in project drill by apache.
the class AvroBatchReader method prepareReader.
/**
* Initialized Avro data reader based on given file system and file path.
* Moves reader to the sync point from where to start reading the data.
*
* @param fileSplit file split
* @param fs file system
* @param opUserName name of the user whom to impersonate while reading the data
* @param queryUserName name of the user who issues the query
* @return Avro file reader
*/
private DataFileReader<GenericRecord> prepareReader(FileSplit fileSplit, FileSystem fs, String opUserName, String queryUserName) {
try {
UserGroupInformation ugi = ImpersonationUtil.createProxyUgi(opUserName, queryUserName);
DataFileReader<GenericRecord> reader = ugi.doAs((PrivilegedExceptionAction<DataFileReader<GenericRecord>>) () -> new DataFileReader<>(new FsInput(fileSplit.getPath(), fs.getConf()), new GenericDatumReader<GenericRecord>()));
// move to sync point from where to read the file
reader.sync(fileSplit.getStart());
return reader;
} catch (IOException | InterruptedException e) {
throw UserException.dataReadError(e).message("Error preparing Avro reader").addContext(String.format("Reader: %s", this)).build(logger);
}
}
use of org.apache.avro.mapred.FsInput in project crunch by cloudera.
the class AvroRecordReader method initialize.
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit split = (FileSplit) genericSplit;
Configuration conf = context.getConfiguration();
SeekableInput in = new FsInput(split.getPath(), conf);
DatumReader<T> datumReader = null;
if (context.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true)) {
ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
datumReader = factory.getReader(schema);
} else {
datumReader = new SpecificDatumReader<T>(schema);
}
this.reader = DataFileReader.openReader(in, datumReader);
// sync to start
reader.sync(split.getStart());
this.start = reader.tell();
this.end = split.getStart() + split.getLength();
}
use of org.apache.avro.mapred.FsInput in project incubator-gobblin by apache.
the class TestAvroExtractor method getRecordFromFile.
public static List<GenericRecord> getRecordFromFile(String path) throws IOException {
Configuration config = new Configuration();
SeekableInput input = new FsInput(new Path(path), config);
DatumReader<GenericRecord> reader1 = new GenericDatumReader<>();
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1);
List<GenericRecord> records = new ArrayList<>();
for (GenericRecord datum : fileReader) {
records.add(datum);
}
fileReader.close();
return records;
}
Aggregations