use of org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader in project beam by apache.
the class HadoopFormatIOSequenceFileTest method extractResultsFromFile.
private Stream<KV<Text, LongWritable>> extractResultsFromFile(String fileName) {
try (SequenceFileRecordReader<Text, LongWritable> reader = new SequenceFileRecordReader<>()) {
Path path = new Path(fileName);
TaskAttemptContext taskContext = HadoopFormats.createTaskAttemptContext(new Configuration(), new JobID("readJob", 0), 0);
reader.initialize(new FileSplit(path, 0L, Long.MAX_VALUE, new String[] { "localhost" }), taskContext);
List<KV<Text, LongWritable>> result = new ArrayList<>();
while (reader.nextKeyValue()) {
result.add(KV.of(new Text(reader.getCurrentKey().toString()), new LongWritable(reader.getCurrentValue().get())));
}
return result.stream();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Aggregations