Search in sources :

Example 66 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.

the class MultipleOutputsCommitter method setupJob.

@Override
public void setupJob(JobContext jobContext) throws IOException {
    rootOutputcommitter.setupJob(jobContext);
    for (Map.Entry<String, OutputCommitter> committer : committers.entrySet()) {
        JobContext namedJobContext = MultipleOutputs.getNamedJobContext(jobContext, committer.getKey());
        committer.getValue().setupJob(namedJobContext);
    }
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) JobContext(org.apache.hadoop.mapreduce.JobContext) Map(java.util.Map)

Example 67 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.

the class MultipleOutputsMainOutputWrapper method checkOutputSpecs.

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    for (String name : MultipleOutputs.getNamedOutputsList(context)) {
        Class<? extends OutputFormat> namedOutputFormatClass = MultipleOutputs.getNamedOutputFormatClass(context, name);
        JobContext namedContext = MultipleOutputs.getNamedJobContext(context, name);
        OutputFormat<K, V> outputFormat = ReflectionUtils.newInstance(namedOutputFormatClass, namedContext.getConfiguration());
        outputFormat.checkOutputSpecs(namedContext);
    }
}
Also used : JobContext(org.apache.hadoop.mapreduce.JobContext)

Example 68 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.

the class TransformRunner method getSinkWriter.

// this is needed because we need to write to the context differently depending on the number of outputs
private OutputWriter<Object, Object> getSinkWriter(MapReduceTaskContext<Object, Object> context, PipelinePhase pipelinePhase, Configuration hConf) {
    Set<StageSpec> reducers = pipelinePhase.getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
    JobContext hadoopContext = context.getHadoopContext();
    if (!reducers.isEmpty() && hadoopContext instanceof Mapper.Context) {
        return new SingleOutputWriter<>(context);
    }
    String sinkOutputsStr = hConf.get(ETLMapReduce.SINK_OUTPUTS_KEY);
    // should never happen, this is set in initialize
    Preconditions.checkNotNull(sinkOutputsStr, "Sink outputs not found in Hadoop conf.");
    Map<String, SinkOutput> sinkOutputs = GSON.fromJson(sinkOutputsStr, ETLMapReduce.SINK_OUTPUTS_TYPE);
    return hasSingleOutput(sinkOutputs) ? new SingleOutputWriter<>(context) : new MultiOutputWriter<>(context, sinkOutputs);
}
Also used : Mapper(org.apache.hadoop.mapreduce.Mapper) StageSpec(co.cask.cdap.etl.spec.StageSpec) JobContext(org.apache.hadoop.mapreduce.JobContext)

Example 69 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project accumulo by apache.

the class AccumuloOutputFormatTest method testBWSettings.

@Test
public void testBWSettings() throws IOException {
    Job job = Job.getInstance();
    // make sure we aren't testing defaults
    final BatchWriterConfig bwDefaults = new BatchWriterConfig();
    assertNotEquals(7654321l, bwDefaults.getMaxLatency(TimeUnit.MILLISECONDS));
    assertNotEquals(9898989l, bwDefaults.getTimeout(TimeUnit.MILLISECONDS));
    assertNotEquals(42, bwDefaults.getMaxWriteThreads());
    assertNotEquals(1123581321l, bwDefaults.getMaxMemory());
    final BatchWriterConfig bwConfig = new BatchWriterConfig();
    bwConfig.setMaxLatency(7654321l, TimeUnit.MILLISECONDS);
    bwConfig.setTimeout(9898989l, TimeUnit.MILLISECONDS);
    bwConfig.setMaxWriteThreads(42);
    bwConfig.setMaxMemory(1123581321l);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
    AccumuloOutputFormat myAOF = new AccumuloOutputFormat() {

        @Override
        public void checkOutputSpecs(JobContext job) throws IOException {
            BatchWriterConfig bwOpts = getBatchWriterOptions(job);
            // passive check
            assertEquals(bwConfig.getMaxLatency(TimeUnit.MILLISECONDS), bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
            assertEquals(bwConfig.getTimeout(TimeUnit.MILLISECONDS), bwOpts.getTimeout(TimeUnit.MILLISECONDS));
            assertEquals(bwConfig.getMaxWriteThreads(), bwOpts.getMaxWriteThreads());
            assertEquals(bwConfig.getMaxMemory(), bwOpts.getMaxMemory());
            // explicit check
            assertEquals(7654321l, bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
            assertEquals(9898989l, bwOpts.getTimeout(TimeUnit.MILLISECONDS));
            assertEquals(42, bwOpts.getMaxWriteThreads());
            assertEquals(1123581321l, bwOpts.getMaxMemory());
        }
    };
    myAOF.checkOutputSpecs(job);
}
Also used : BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 70 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project flink by apache.

the class HadoopInputFormatBase method getStatistics.

@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
    // only gather base statistics for FileInputFormats
    if (!(mapreduceInputFormat instanceof FileInputFormat)) {
        return null;
    }
    JobContext jobContext = new JobContextImpl(configuration, null);
    final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ? (FileBaseStatistics) cachedStats : null;
    try {
        final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
        return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
    } catch (IOException ioex) {
        if (LOG.isWarnEnabled()) {
            LOG.warn("Could not determine statistics due to an io error: " + ioex.getMessage());
        }
    } catch (Throwable t) {
        if (LOG.isErrorEnabled()) {
            LOG.error("Unexpected problem while getting the file statistics: " + t.getMessage(), t);
        }
    }
    // no statistics available
    return null;
}
Also used : Path(org.apache.flink.core.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) FileStatus(org.apache.flink.core.fs.FileStatus) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) JobContext(org.apache.hadoop.mapreduce.JobContext) IOException(java.io.IOException) FileInputFormat(org.apache.hadoop.mapreduce.lib.input.FileInputFormat)

Aggregations

JobContext (org.apache.hadoop.mapreduce.JobContext)85 Configuration (org.apache.hadoop.conf.Configuration)41 Job (org.apache.hadoop.mapreduce.Job)35 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)34 Test (org.junit.Test)31 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)29 InputSplit (org.apache.hadoop.mapreduce.InputSplit)28 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)25 Path (org.apache.hadoop.fs.Path)24 IOException (java.io.IOException)22 File (java.io.File)19 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)16 ArrayList (java.util.ArrayList)13 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)11 JobConf (org.apache.hadoop.mapred.JobConf)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 LongWritable (org.apache.hadoop.io.LongWritable)9 MapFile (org.apache.hadoop.io.MapFile)9 JobID (org.apache.hadoop.mapreduce.JobID)7 FileSystem (org.apache.hadoop.fs.FileSystem)6