use of co.cask.cdap.internal.app.runtime.batch.dataset.input.MapperInput in project cdap by caskdata.
the class MapReduceRuntimeService method setInputsIfNeeded.
/**
* Sets the configurations used for inputs.
* Multiple mappers could be defined, so we first check that their output types are consistent.
*
* @return the TypeToken for one of the mappers (doesn't matter which one, since we check that all of their output
* key/value types are consistent. Returns null if the mapper class was not configured directly on the job and the
* job's mapper class is to be used.
* @throws IllegalArgumentException if any of the configured mapper output types are inconsistent.
*/
@Nullable
private TypeToken<Mapper> setInputsIfNeeded(Job job) throws IOException, ClassNotFoundException {
Class<? extends Mapper> jobMapperClass = job.getMapperClass();
Class<? extends Mapper> firstMapperClass = null;
Map.Entry<Class, Class> firstMapperOutputTypes = null;
for (Map.Entry<String, MapperInput> mapperInputEntry : context.getMapperInputs().entrySet()) {
MapperInput mapperInput = mapperInputEntry.getValue();
InputFormatProvider provider = mapperInput.getInputFormatProvider();
Map<String, String> inputFormatConfiguration = mapperInput.getInputFormatConfiguration();
// default to what is configured on the job, if user didn't specify a mapper for an input
Class<? extends Mapper> mapperClass = mapperInput.getMapper() == null ? jobMapperClass : mapperInput.getMapper();
// check output key/value type consistency, except for the first input
if (firstMapperClass == null) {
firstMapperClass = mapperClass;
firstMapperOutputTypes = getMapperOutputKeyValueTypes(mapperClass);
} else {
assertConsistentTypes(firstMapperClass, firstMapperOutputTypes, mapperClass);
}
// A bit hacky for stream.
if (provider instanceof StreamInputFormatProvider) {
// pass in mapperInput.getMapper() instead of mapperClass, because mapperClass defaults to the Identity Mapper
StreamInputFormatProvider inputFormatProvider = (StreamInputFormatProvider) provider;
setDecoderForStream(inputFormatProvider, job, inputFormatConfiguration, mapperInput.getMapper());
// after lineage/usage registry since we want to track the intent of reading from there.
try {
authorizationEnforcer.enforce(inputFormatProvider.getStreamId(), authenticationContext.getPrincipal(), Action.READ);
} catch (Exception e) {
Throwables.propagateIfPossible(e, IOException.class);
throw new IOException(e);
}
}
MultipleInputs.addInput(job, mapperInputEntry.getKey(), mapperInput.getInputFormatClassName(), inputFormatConfiguration, mapperClass);
}
// if firstMapperClass == jobMapperClass, return null if the user didn't configure the mapper class explicitly
if (firstMapperClass == null || firstMapperClass == jobMapperClass) {
return resolveClass(job.getConfiguration(), MRJobConfig.MAP_CLASS_ATTR, Mapper.class);
}
return resolveClass(firstMapperClass, Mapper.class);
}
Aggregations