Search in sources :

Example 1 with TaggedInputSplit

use of io.cdap.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit in project cdap by caskdata.

the class MapperWrapper method createAutoFlushingContext.

private WrappedMapper.Context createAutoFlushingContext(final Context context, final BasicMapReduceTaskContext basicMapReduceContext, final MapTaskMetricsWriter metricsWriter) {
    // NOTE: we will change auto-flush to take into account size of buffered data, so no need to do/test a lot with
    // current approach
    final int flushFreq = context.getConfiguration().getInt("c.mapper.flush.freq", 10000);
    final long reportIntervalInMillis = basicMapReduceContext.getMetricsReportIntervalMillis();
    @SuppressWarnings("unchecked") WrappedMapper.Context flushingContext = new WrappedMapper().new Context(context) {

        private int processedRecords = 0;

        private long nextTimeToReportMetrics = 0L;

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean result = super.nextKeyValue();
            if (++processedRecords > flushFreq) {
                try {
                    LOG.trace("Flushing dataset operations...");
                    basicMapReduceContext.flushOperations();
                } catch (Exception e) {
                    LOG.error("Failed to persist changes", e);
                    throw Throwables.propagate(e);
                }
                processedRecords = 0;
            }
            if (System.currentTimeMillis() >= nextTimeToReportMetrics) {
                metricsWriter.reportMetrics();
                nextTimeToReportMetrics = System.currentTimeMillis() + reportIntervalInMillis;
            }
            return result;
        }

        @Override
        public InputSplit getInputSplit() {
            InputSplit inputSplit = super.getInputSplit();
            if (inputSplit instanceof TaggedInputSplit) {
                // expose the delegate InputSplit to the user
                inputSplit = ((TaggedInputSplit) inputSplit).getInputSplit();
            }
            return inputSplit;
        }

        @Override
        public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException {
            InputSplit inputSplit = super.getInputSplit();
            if (inputSplit instanceof MultiInputTaggedSplit) {
                // expose the delegate InputFormat to the user
                return ((MultiInputTaggedSplit) inputSplit).getInputFormatClass();
            }
            return super.getInputFormatClass();
        }
    };
    return flushingContext;
}
Also used : RuntimeContext(io.cdap.cdap.api.RuntimeContext) TaggedInputSplit(io.cdap.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) MultiInputTaggedSplit(io.cdap.cdap.internal.app.runtime.batch.dataset.input.MultiInputTaggedSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) TaggedInputSplit(io.cdap.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit) IOException(java.io.IOException)

Aggregations

RuntimeContext (io.cdap.cdap.api.RuntimeContext)1 MultiInputTaggedSplit (io.cdap.cdap.internal.app.runtime.batch.dataset.input.MultiInputTaggedSplit)1 TaggedInputSplit (io.cdap.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit)1 IOException (java.io.IOException)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 WrappedMapper (org.apache.hadoop.mapreduce.lib.map.WrappedMapper)1