use of io.cdap.cdap.internal.app.runtime.batch.dataset.input.MultiInputTaggedSplit in project cdap by caskdata.
the class MapperWrapper method run.
@SuppressWarnings("unchecked")
@Override
public void run(Context context) throws IOException, InterruptedException {
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration());
ClassLoader weakReferenceClassLoader = new WeakReferenceDelegatorClassLoader(classLoader);
BasicMapReduceTaskContext basicMapReduceContext = classLoader.getTaskContextProvider().get(context);
String program = basicMapReduceContext.getProgramName();
final MapTaskMetricsWriter mapTaskMetricsWriter = new MapTaskMetricsWriter(basicMapReduceContext.getProgramMetrics(), context);
// this is a hook for periodic flushing of changes buffered by datasets (to avoid OOME)
WrappedMapper.Context flushingContext = createAutoFlushingContext(context, basicMapReduceContext, mapTaskMetricsWriter);
basicMapReduceContext.setHadoopContext(flushingContext);
InputSplit inputSplit = context.getInputSplit();
if (inputSplit instanceof MultiInputTaggedSplit) {
basicMapReduceContext.setInputContext(InputContexts.create((MultiInputTaggedSplit) inputSplit));
}
ClassLoader programClassLoader = classLoader.getProgramClassLoader();
Mapper delegate = createMapperInstance(programClassLoader, getWrappedMapper(context.getConfiguration()), context, program);
// injecting runtime components, like datasets, etc.
try {
Reflections.visit(delegate, delegate.getClass(), new PropertyFieldSetter(basicMapReduceContext.getSpecification().getProperties()), new MetricsFieldSetter(basicMapReduceContext.getMetrics()), new DataSetFieldSetter(basicMapReduceContext));
} catch (Throwable t) {
Throwable rootCause = Throwables.getRootCause(t);
USERLOG.error("Failed to initialize program '{}' with error: {}. Please check the system logs for more details.", program, rootCause.getMessage(), rootCause);
throw new IOException(String.format("Failed to inject fields to %s", delegate.getClass()), t);
}
ClassLoader oldClassLoader;
if (delegate instanceof ProgramLifecycle) {
oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
try {
((ProgramLifecycle) delegate).initialize(new MapReduceLifecycleContext(basicMapReduceContext));
} catch (Exception e) {
Throwable rootCause = Throwables.getRootCause(e);
USERLOG.error("Failed to initialize program '{}' with error: {}. Please check the system logs for more " + "details.", program, rootCause.getMessage(), rootCause);
throw new IOException(String.format("Failed to initialize mapper with %s", basicMapReduceContext), e);
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
}
oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
try {
delegate.run(flushingContext);
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
// memory by tx agent)
try {
basicMapReduceContext.flushOperations();
} catch (Exception e) {
throw new IOException("Failed to flush operations at the end of mapper of " + basicMapReduceContext, e);
}
// Close all writers created by MultipleOutputs
basicMapReduceContext.closeMultiOutputs();
if (delegate instanceof ProgramLifecycle) {
oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
try {
((ProgramLifecycle<? extends RuntimeContext>) delegate).destroy();
} catch (Exception e) {
LOG.error("Error during destroy of mapper {}", basicMapReduceContext, e);
// Do nothing, try to finish
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
}
// Emit metrics one final time
mapTaskMetricsWriter.reportMetrics();
}
use of io.cdap.cdap.internal.app.runtime.batch.dataset.input.MultiInputTaggedSplit in project cdap by caskdata.
the class MapperWrapper method createAutoFlushingContext.
private WrappedMapper.Context createAutoFlushingContext(final Context context, final BasicMapReduceTaskContext basicMapReduceContext, final MapTaskMetricsWriter metricsWriter) {
// NOTE: we will change auto-flush to take into account size of buffered data, so no need to do/test a lot with
// current approach
final int flushFreq = context.getConfiguration().getInt("c.mapper.flush.freq", 10000);
final long reportIntervalInMillis = basicMapReduceContext.getMetricsReportIntervalMillis();
@SuppressWarnings("unchecked") WrappedMapper.Context flushingContext = new WrappedMapper().new Context(context) {
private int processedRecords = 0;
private long nextTimeToReportMetrics = 0L;
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
boolean result = super.nextKeyValue();
if (++processedRecords > flushFreq) {
try {
LOG.trace("Flushing dataset operations...");
basicMapReduceContext.flushOperations();
} catch (Exception e) {
LOG.error("Failed to persist changes", e);
throw Throwables.propagate(e);
}
processedRecords = 0;
}
if (System.currentTimeMillis() >= nextTimeToReportMetrics) {
metricsWriter.reportMetrics();
nextTimeToReportMetrics = System.currentTimeMillis() + reportIntervalInMillis;
}
return result;
}
@Override
public InputSplit getInputSplit() {
InputSplit inputSplit = super.getInputSplit();
if (inputSplit instanceof TaggedInputSplit) {
// expose the delegate InputSplit to the user
inputSplit = ((TaggedInputSplit) inputSplit).getInputSplit();
}
return inputSplit;
}
@Override
public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException {
InputSplit inputSplit = super.getInputSplit();
if (inputSplit instanceof MultiInputTaggedSplit) {
// expose the delegate InputFormat to the user
return ((MultiInputTaggedSplit) inputSplit).getInputFormatClass();
}
return super.getInputFormatClass();
}
};
return flushingContext;
}
Aggregations