use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.
the class MultipleOutputsCommitter method setupJob.
@Override
public void setupJob(JobContext jobContext) throws IOException {
rootOutputcommitter.setupJob(jobContext);
for (Map.Entry<String, OutputCommitter> committer : committers.entrySet()) {
JobContext namedJobContext = MultipleOutputs.getNamedJobContext(jobContext, committer.getKey());
committer.getValue().setupJob(namedJobContext);
}
}
use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.
the class MultipleOutputsMainOutputWrapper method checkOutputSpecs.
@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
for (String name : MultipleOutputs.getNamedOutputsList(context)) {
Class<? extends OutputFormat> namedOutputFormatClass = MultipleOutputs.getNamedOutputFormatClass(context, name);
JobContext namedContext = MultipleOutputs.getNamedJobContext(context, name);
OutputFormat<K, V> outputFormat = ReflectionUtils.newInstance(namedOutputFormatClass, namedContext.getConfiguration());
outputFormat.checkOutputSpecs(namedContext);
}
}
use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.
the class TransformRunner method getSinkWriter.
// this is needed because we need to write to the context differently depending on the number of outputs
private OutputWriter<Object, Object> getSinkWriter(MapReduceTaskContext<Object, Object> context, PipelinePhase pipelinePhase, Configuration hConf) {
Set<StageSpec> reducers = pipelinePhase.getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
JobContext hadoopContext = context.getHadoopContext();
if (!reducers.isEmpty() && hadoopContext instanceof Mapper.Context) {
return new SingleOutputWriter<>(context);
}
String sinkOutputsStr = hConf.get(ETLMapReduce.SINK_OUTPUTS_KEY);
// should never happen, this is set in initialize
Preconditions.checkNotNull(sinkOutputsStr, "Sink outputs not found in Hadoop conf.");
Map<String, SinkOutput> sinkOutputs = GSON.fromJson(sinkOutputsStr, ETLMapReduce.SINK_OUTPUTS_TYPE);
return hasSingleOutput(sinkOutputs) ? new SingleOutputWriter<>(context) : new MultiOutputWriter<>(context, sinkOutputs);
}
use of org.apache.hadoop.mapreduce.JobContext in project accumulo by apache.
the class AccumuloOutputFormatTest method testBWSettings.
@Test
public void testBWSettings() throws IOException {
Job job = Job.getInstance();
// make sure we aren't testing defaults
final BatchWriterConfig bwDefaults = new BatchWriterConfig();
assertNotEquals(7654321l, bwDefaults.getMaxLatency(TimeUnit.MILLISECONDS));
assertNotEquals(9898989l, bwDefaults.getTimeout(TimeUnit.MILLISECONDS));
assertNotEquals(42, bwDefaults.getMaxWriteThreads());
assertNotEquals(1123581321l, bwDefaults.getMaxMemory());
final BatchWriterConfig bwConfig = new BatchWriterConfig();
bwConfig.setMaxLatency(7654321l, TimeUnit.MILLISECONDS);
bwConfig.setTimeout(9898989l, TimeUnit.MILLISECONDS);
bwConfig.setMaxWriteThreads(42);
bwConfig.setMaxMemory(1123581321l);
AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
AccumuloOutputFormat myAOF = new AccumuloOutputFormat() {
@Override
public void checkOutputSpecs(JobContext job) throws IOException {
BatchWriterConfig bwOpts = getBatchWriterOptions(job);
// passive check
assertEquals(bwConfig.getMaxLatency(TimeUnit.MILLISECONDS), bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
assertEquals(bwConfig.getTimeout(TimeUnit.MILLISECONDS), bwOpts.getTimeout(TimeUnit.MILLISECONDS));
assertEquals(bwConfig.getMaxWriteThreads(), bwOpts.getMaxWriteThreads());
assertEquals(bwConfig.getMaxMemory(), bwOpts.getMaxMemory());
// explicit check
assertEquals(7654321l, bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
assertEquals(9898989l, bwOpts.getTimeout(TimeUnit.MILLISECONDS));
assertEquals(42, bwOpts.getMaxWriteThreads());
assertEquals(1123581321l, bwOpts.getMaxMemory());
}
};
myAOF.checkOutputSpecs(job);
}
use of org.apache.hadoop.mapreduce.JobContext in project flink by apache.
the class HadoopInputFormatBase method getStatistics.
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
// only gather base statistics for FileInputFormats
if (!(mapreduceInputFormat instanceof FileInputFormat)) {
return null;
}
JobContext jobContext = new JobContextImpl(configuration, null);
final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ? (FileBaseStatistics) cachedStats : null;
try {
final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics due to an io error: " + ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics: " + t.getMessage(), t);
}
}
// no statistics available
return null;
}
Aggregations