use of org.apache.hadoop.mapreduce.RecordReader in project asterixdb by apache.
the class HDFSReadOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
final List<FileSplit> inputSplits = splitsFactory.getSplits();
return new AbstractUnaryOutputSourceOperatorNodePushable() {
private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();
private ContextFactory ctxFactory = new ContextFactory();
@SuppressWarnings("unchecked")
@Override
public void initialize() throws HyracksDataException {
ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
try {
writer.open();
Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
Job job = confFactory.getConf();
job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
int size = inputSplits.size();
for (int i = 0; i < size; i++) {
/**
* read all the partitions scheduled to the current node
*/
if (scheduledLocations[i].equals(nodeName)) {
/**
* pick an unread split to read synchronize among
* simultaneous partitions in the same machine
*/
synchronized (executed) {
if (executed[i] == false) {
executed[i] = true;
} else {
continue;
}
}
/**
* read the split
*/
TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
reader.initialize(inputSplits.get(i), context);
while (reader.nextKeyValue() == true) {
parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer, inputSplits.get(i).toString());
}
}
}
parser.close(writer);
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
Thread.currentThread().setContextClassLoader(ctxCL);
}
}
};
}
use of org.apache.hadoop.mapreduce.RecordReader in project ignite by apache.
the class HadoopV2MapTask method run0.
/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
OutputFormat outputFormat = null;
Exception err = null;
JobContextImpl jobCtx = taskCtx.jobContext();
if (taskCtx.taskInfo().hasMapperIndex())
HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
else
HadoopMapperUtils.clearMapperIndex();
try {
HadoopV2Context hadoopCtx = hadoopContext();
InputSplit nativeSplit = hadoopCtx.getInputSplit();
if (nativeSplit == null)
throw new IgniteCheckedException("Input split cannot be null.");
InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopCtx.getConfiguration());
RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx);
reader.initialize(nativeSplit, hadoopCtx);
hadoopCtx.reader(reader);
HadoopJobInfo jobInfo = taskCtx.job().info();
outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);
Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration());
try {
mapper.run(new WrappedMapper().getMapContext(hadoopCtx));
taskCtx.onMapperFinished();
} finally {
closeWriter();
}
commit(outputFormat);
} catch (InterruptedException e) {
err = e;
Thread.currentThread().interrupt();
throw new IgniteInterruptedCheckedException(e);
} catch (Exception e) {
err = e;
throw new IgniteCheckedException(e);
} finally {
HadoopMapperUtils.clearMapperIndex();
if (err != null)
abort(outputFormat);
}
}
Aggregations