Search in sources :

Example 1 with StreamInputFormatProvider

use of co.cask.cdap.internal.app.runtime.batch.stream.StreamInputFormatProvider in project cdap by caskdata.

the class BasicMapReduceContext method addInput.

@Override
public void addInput(Input input, @Nullable Class<?> mapperCls) {
    if (input.getNamespace() != null && input.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) {
        // trying to access system namespace from a program outside system namespace is not allowed
        throw new IllegalArgumentException(String.format("Accessing Input %s in system namespace " + "is not allowed from the namespace %s", input.getName(), getProgram().getNamespaceId()));
    }
    if (input instanceof Input.DatasetInput) {
        Input.DatasetInput datasetInput = (Input.DatasetInput) input;
        Input.InputFormatProviderInput createdInput = createInput(datasetInput);
        addInput(createdInput.getAlias(), createdInput.getInputFormatProvider(), mapperCls);
    } else if (input instanceof Input.StreamInput) {
        Input.StreamInput streamInput = (Input.StreamInput) input;
        String namespace = streamInput.getNamespace();
        if (namespace == null) {
            namespace = getProgram().getNamespaceId();
        }
        addInput(input.getAlias(), new StreamInputFormatProvider(new NamespaceId(namespace), streamInput, streamAdmin), mapperCls);
    } else if (input instanceof Input.InputFormatProviderInput) {
        addInput(input.getAlias(), ((Input.InputFormatProviderInput) input).getInputFormatProvider(), mapperCls);
    } else {
        // shouldn't happen unless user defines their own Input class
        throw new IllegalArgumentException(String.format("Input %s has unknown input class %s", input.getName(), input.getClass().getCanonicalName()));
    }
}
Also used : MapperInput(co.cask.cdap.internal.app.runtime.batch.dataset.input.MapperInput) Input(co.cask.cdap.api.data.batch.Input) StreamInputFormatProvider(co.cask.cdap.internal.app.runtime.batch.stream.StreamInputFormatProvider) NamespaceId(co.cask.cdap.proto.id.NamespaceId)

Example 2 with StreamInputFormatProvider

use of co.cask.cdap.internal.app.runtime.batch.stream.StreamInputFormatProvider in project cdap by caskdata.

the class MapReduceRuntimeService method setInputsIfNeeded.

/**
 * Sets the configurations used for inputs.
 * Multiple mappers could be defined, so we first check that their output types are consistent.
 *
 * @return the TypeToken for one of the mappers (doesn't matter which one, since we check that all of their output
 * key/value types are consistent. Returns null if the mapper class was not configured directly on the job and the
 * job's mapper class is to be used.
 * @throws IllegalArgumentException if any of the configured mapper output types are inconsistent.
 */
@Nullable
private TypeToken<Mapper> setInputsIfNeeded(Job job) throws IOException, ClassNotFoundException {
    Class<? extends Mapper> jobMapperClass = job.getMapperClass();
    Class<? extends Mapper> firstMapperClass = null;
    Map.Entry<Class, Class> firstMapperOutputTypes = null;
    for (Map.Entry<String, MapperInput> mapperInputEntry : context.getMapperInputs().entrySet()) {
        MapperInput mapperInput = mapperInputEntry.getValue();
        InputFormatProvider provider = mapperInput.getInputFormatProvider();
        Map<String, String> inputFormatConfiguration = mapperInput.getInputFormatConfiguration();
        // default to what is configured on the job, if user didn't specify a mapper for an input
        Class<? extends Mapper> mapperClass = mapperInput.getMapper() == null ? jobMapperClass : mapperInput.getMapper();
        // check output key/value type consistency, except for the first input
        if (firstMapperClass == null) {
            firstMapperClass = mapperClass;
            firstMapperOutputTypes = getMapperOutputKeyValueTypes(mapperClass);
        } else {
            assertConsistentTypes(firstMapperClass, firstMapperOutputTypes, mapperClass);
        }
        // A bit hacky for stream.
        if (provider instanceof StreamInputFormatProvider) {
            // pass in mapperInput.getMapper() instead of mapperClass, because mapperClass defaults to the Identity Mapper
            StreamInputFormatProvider inputFormatProvider = (StreamInputFormatProvider) provider;
            setDecoderForStream(inputFormatProvider, job, inputFormatConfiguration, mapperInput.getMapper());
            // after lineage/usage registry since we want to track the intent of reading from there.
            try {
                authorizationEnforcer.enforce(inputFormatProvider.getStreamId(), authenticationContext.getPrincipal(), Action.READ);
            } catch (Exception e) {
                Throwables.propagateIfPossible(e, IOException.class);
                throw new IOException(e);
            }
        }
        MultipleInputs.addInput(job, mapperInputEntry.getKey(), mapperInput.getInputFormatClassName(), inputFormatConfiguration, mapperClass);
    }
    // if firstMapperClass == jobMapperClass, return null if the user didn't configure the mapper class explicitly
    if (firstMapperClass == null || firstMapperClass == jobMapperClass) {
        return resolveClass(job.getConfiguration(), MRJobConfig.MAP_CLASS_ATTR, Mapper.class);
    }
    return resolveClass(firstMapperClass, Mapper.class);
}
Also used : MapperInput(co.cask.cdap.internal.app.runtime.batch.dataset.input.MapperInput) InputFormatProvider(co.cask.cdap.api.data.batch.InputFormatProvider) StreamInputFormatProvider(co.cask.cdap.internal.app.runtime.batch.stream.StreamInputFormatProvider) StreamInputFormatProvider(co.cask.cdap.internal.app.runtime.batch.stream.StreamInputFormatProvider) IOException(java.io.IOException) Map(java.util.Map) HashMap(java.util.HashMap) AbstractMap(java.util.AbstractMap) ProvisionException(com.google.inject.ProvisionException) IOException(java.io.IOException) TransactionFailureException(org.apache.tephra.TransactionFailureException) URISyntaxException(java.net.URISyntaxException) Nullable(javax.annotation.Nullable)

Aggregations

MapperInput (co.cask.cdap.internal.app.runtime.batch.dataset.input.MapperInput)2 StreamInputFormatProvider (co.cask.cdap.internal.app.runtime.batch.stream.StreamInputFormatProvider)2 Input (co.cask.cdap.api.data.batch.Input)1 InputFormatProvider (co.cask.cdap.api.data.batch.InputFormatProvider)1 NamespaceId (co.cask.cdap.proto.id.NamespaceId)1 ProvisionException (com.google.inject.ProvisionException)1 IOException (java.io.IOException)1 URISyntaxException (java.net.URISyntaxException)1 AbstractMap (java.util.AbstractMap)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Nullable (javax.annotation.Nullable)1 TransactionFailureException (org.apache.tephra.TransactionFailureException)1