Search in sources :

Example 1 with TaggedInputSplit

use of co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit in project cdap by caskdata.

the class MapperWrapper method createAutoFlushingContext.

private WrappedMapper.Context createAutoFlushingContext(final Context context, final BasicMapReduceTaskContext basicMapReduceContext) {
    // NOTE: we will change auto-flush to take into account size of buffered data, so no need to do/test a lot with
    //       current approach
    final int flushFreq = context.getConfiguration().getInt("c.mapper.flush.freq", 10000);
    @SuppressWarnings("unchecked") WrappedMapper.Context flushingContext = new WrappedMapper().new Context(context) {

        private int processedRecords = 0;

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean result = super.nextKeyValue();
            if (++processedRecords > flushFreq) {
                try {
                    LOG.trace("Flushing dataset operations...");
                    basicMapReduceContext.flushOperations();
                } catch (Exception e) {
                    LOG.error("Failed to persist changes", e);
                    throw Throwables.propagate(e);
                }
                processedRecords = 0;
            }
            return result;
        }

        @Override
        public InputSplit getInputSplit() {
            InputSplit inputSplit = super.getInputSplit();
            if (inputSplit instanceof TaggedInputSplit) {
                // expose the delegate InputSplit to the user
                inputSplit = ((TaggedInputSplit) inputSplit).getInputSplit();
            }
            return inputSplit;
        }

        @Override
        public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException {
            InputSplit inputSplit = super.getInputSplit();
            if (inputSplit instanceof MultiInputTaggedSplit) {
                // expose the delegate InputFormat to the user
                return ((MultiInputTaggedSplit) inputSplit).getInputFormatClass();
            }
            return super.getInputFormatClass();
        }
    };
    return flushingContext;
}
Also used : RuntimeContext(co.cask.cdap.api.RuntimeContext) TaggedInputSplit(co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) MultiInputTaggedSplit(co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputTaggedSplit) TaggedInputSplit(co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IOException(java.io.IOException)

Aggregations

RuntimeContext (co.cask.cdap.api.RuntimeContext)1 MultiInputTaggedSplit (co.cask.cdap.internal.app.runtime.batch.dataset.input.MultiInputTaggedSplit)1 TaggedInputSplit (co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit)1 IOException (java.io.IOException)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 WrappedMapper (org.apache.hadoop.mapreduce.lib.map.WrappedMapper)1