use of org.talend.components.simplefileio.runtime.sources.AvroHdfsFileSource in project components by Talend.
the class SimpleRecordFormatAvroIO method read.
@Override
public PCollection<IndexedRecord> read(PBegin in) {
// Reuseable coder.
LazyAvroCoder<Object> lac = LazyAvroCoder.of();
AvroHdfsFileSource source = AvroHdfsFileSource.of(doAs, path, lac);
source.getExtraHadoopConfiguration().addFrom(getExtraHadoopConfiguration());
source.setLimit(limit);
PCollection<KV<AvroKey, NullWritable>> read = //
in.apply(Read.from(source)).setCoder(source.getDefaultOutputCoder());
PCollection<AvroKey> pc1 = read.apply(Keys.<AvroKey>create());
PCollection<Object> pc2 = pc1.apply(ParDo.of(new ExtractRecordFromAvroKey()));
pc2 = pc2.setCoder(lac);
PCollection<IndexedRecord> pc3 = pc2.apply(ConvertToIndexedRecord.<Object>of());
return pc3;
}
Aggregations