use of org.apache.hadoop.mapreduce.lib.input.FileInputFormat in project spark-dataflow by cloudera.
the class HadoopFileFormatPipelineTest method testSequenceFile.
@Test
public void testSequenceFile() throws Exception {
populateFile();
Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
@SuppressWarnings("unchecked") Class<? extends FileInputFormat<IntWritable, Text>> inputFormatClass = (Class<? extends FileInputFormat<IntWritable, Text>>) (Class<?>) SequenceFileInputFormat.class;
HadoopIO.Read.Bound<IntWritable, Text> read = HadoopIO.Read.from(inputFile.getAbsolutePath(), inputFormatClass, IntWritable.class, Text.class);
PCollection<KV<IntWritable, Text>> input = p.apply(read);
@SuppressWarnings("unchecked") Class<? extends FileOutputFormat<IntWritable, Text>> outputFormatClass = (Class<? extends FileOutputFormat<IntWritable, Text>>) (Class<?>) TemplatedSequenceFileOutputFormat.class;
@SuppressWarnings("unchecked") HadoopIO.Write.Bound<IntWritable, Text> write = HadoopIO.Write.to(outputFile.getAbsolutePath(), outputFormatClass, IntWritable.class, Text.class);
input.apply(write.withoutSharding());
EvaluationResult res = SparkPipelineRunner.create().run(p);
res.close();
IntWritable key = new IntWritable();
Text value = new Text();
try (Reader reader = new Reader(new Configuration(), Reader.file(new Path(outputFile.toURI())))) {
int i = 0;
while (reader.next(key, value)) {
assertEquals(i, key.get());
assertEquals("value-" + i, value.toString());
i++;
}
}
}
use of org.apache.hadoop.mapreduce.lib.input.FileInputFormat in project flink by apache.
the class HadoopInputFormatBase method getStatistics.
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
// only gather base statistics for FileInputFormats
if (!(mapreduceInputFormat instanceof FileInputFormat)) {
return null;
}
JobContext jobContext;
try {
jobContext = HadoopUtils.instantiateJobContext(configuration, null);
} catch (Exception e) {
throw new RuntimeException(e);
}
final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ? (FileBaseStatistics) cachedStats : null;
try {
final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics due to an io error: " + ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics: " + t.getMessage(), t);
}
}
// no statistics available
return null;
}
Aggregations