Search in sources :

Example 1 with MapperInput

use of io.cdap.cdap.internal.app.runtime.batch.dataset.input.MapperInput in project cdap by caskdata.

the class MapReduceRuntimeService method setInputsIfNeeded.

/**
 * Sets the configurations used for inputs.
 * Multiple mappers could be defined, so we first check that their output types are consistent.
 *
 * @return the TypeToken for one of the mappers (doesn't matter which one, since we check that all of their output
 * key/value types are consistent. Returns null if the mapper class was not configured directly on the job and the
 * job's mapper class is to be used.
 * @throws IllegalArgumentException if any of the configured mapper output types are inconsistent.
 */
@Nullable
private TypeToken<Mapper> setInputsIfNeeded(Job job) throws IOException, ClassNotFoundException {
    Class<? extends Mapper> jobMapperClass = job.getMapperClass();
    Class<? extends Mapper> firstMapperClass = null;
    Map.Entry<Class, Class> firstMapperOutputTypes = null;
    for (Map.Entry<String, MapperInput> mapperInputEntry : context.getMapperInputs().entrySet()) {
        MapperInput mapperInput = mapperInputEntry.getValue();
        InputFormatProvider provider = mapperInput.getInputFormatProvider();
        Map<String, String> inputFormatConfiguration = mapperInput.getInputFormatConfiguration();
        // default to what is configured on the job, if user didn't specify a mapper for an input
        Class<? extends Mapper> mapperClass = mapperInput.getMapper() == null ? jobMapperClass : mapperInput.getMapper();
        // check output key/value type consistency, except for the first input
        if (firstMapperClass == null) {
            firstMapperClass = mapperClass;
            firstMapperOutputTypes = getMapperOutputKeyValueTypes(mapperClass);
        } else {
            assertConsistentTypes(firstMapperClass, firstMapperOutputTypes, mapperClass);
        }
        MultipleInputs.addInput(job, mapperInputEntry.getKey(), mapperInput.getInputFormatClassName(), inputFormatConfiguration, mapperClass);
    }
    // if firstMapperClass == jobMapperClass, return null if the user didn't configure the mapper class explicitly
    if (firstMapperClass == null || firstMapperClass == jobMapperClass) {
        return resolveClass(job.getConfiguration(), MRJobConfig.MAP_CLASS_ATTR, Mapper.class);
    }
    return resolveClass(firstMapperClass, Mapper.class);
}
Also used : MapperInput(io.cdap.cdap.internal.app.runtime.batch.dataset.input.MapperInput) InputFormatProvider(io.cdap.cdap.api.data.batch.InputFormatProvider) Map(java.util.Map) HashMap(java.util.HashMap) AbstractMap(java.util.AbstractMap) Nullable(javax.annotation.Nullable)

Aggregations

InputFormatProvider (io.cdap.cdap.api.data.batch.InputFormatProvider)1 MapperInput (io.cdap.cdap.internal.app.runtime.batch.dataset.input.MapperInput)1 AbstractMap (java.util.AbstractMap)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Nullable (javax.annotation.Nullable)1