use of org.apache.hadoop.mapred.RecordReader in project voldemort by voldemort.
the class JsonSequenceFileInputFormat method getRecordReader.
@Override
public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException {
String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
log.info("Input file path:" + inputPathString);
Path inputPath = new Path(inputPathString);
SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf);
SequenceFile.Metadata meta = reader.getMetadata();
try {
Text keySchema = meta.get(new Text("key.schema"));
Text valueSchema = meta.get(new Text("value.schema"));
if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
throw new Exception();
}
// update Joboconf with schemas
conf.set("mapper.input.key.schema", keySchema.toString());
conf.set("mapper.input.value.schema", valueSchema.toString());
} catch (Exception e) {
throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
}
return super.getRecordReader(split, conf, reporter);
}
use of org.apache.hadoop.mapred.RecordReader in project ignite by apache.
the class HadoopV1MapTask method run.
/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
HadoopJobEx job = taskCtx.job();
HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;
if (taskCtx.taskInfo().hasMapperIndex())
HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
else
HadoopMapperUtils.clearMapperIndex();
try {
JobConf jobConf = taskCtx0.jobConf();
InputFormat inFormat = jobConf.getInputFormat();
HadoopInputSplit split = info().inputSplit();
InputSplit nativeSplit;
if (split instanceof HadoopFileBlock) {
HadoopFileBlock block = (HadoopFileBlock) split;
nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS);
} else
nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split);
assert nativeSplit != null;
Reporter reporter = new HadoopV1Reporter(taskCtx);
HadoopV1OutputCollector collector = null;
try {
collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), taskCtx0.attemptId());
RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);
Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);
Object key = reader.createKey();
Object val = reader.createValue();
assert mapper != null;
try {
try {
while (reader.next(key, val)) {
if (isCancelled())
throw new HadoopTaskCancelledException("Map task cancelled.");
mapper.map(key, val, collector, reporter);
}
taskCtx.onMapperFinished();
} finally {
mapper.close();
}
} finally {
collector.closeWriter();
}
collector.commit();
} catch (Exception e) {
if (collector != null)
collector.abort();
throw new IgniteCheckedException(e);
}
} finally {
HadoopMapperUtils.clearMapperIndex();
}
}
Aggregations