Examples with Input - co.cask.cdap.api.data.batch.Input

Example 1 with Input

use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.

the class BasicMapReduceContext method addInput.

@Override
public void addInput(Input input, @Nullable Class<?> mapperCls) {
    if (input.getNamespace() != null && input.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) {
        // trying to access system namespace from a program outside system namespace is not allowed
        throw new IllegalArgumentException(String.format("Accessing Input %s in system namespace " + "is not allowed from the namespace %s", input.getName(), getProgram().getNamespaceId()));
    }
    if (input instanceof Input.DatasetInput) {
        Input.DatasetInput datasetInput = (Input.DatasetInput) input;
        Input.InputFormatProviderInput createdInput = createInput(datasetInput);
        addInput(createdInput.getAlias(), createdInput.getInputFormatProvider(), mapperCls);
    } else if (input instanceof Input.StreamInput) {
        Input.StreamInput streamInput = (Input.StreamInput) input;
        String namespace = streamInput.getNamespace();
        if (namespace == null) {
            namespace = getProgram().getNamespaceId();
        }
        addInput(input.getAlias(), new StreamInputFormatProvider(new NamespaceId(namespace), streamInput, streamAdmin), mapperCls);
    } else if (input instanceof Input.InputFormatProviderInput) {
        addInput(input.getAlias(), ((Input.InputFormatProviderInput) input).getInputFormatProvider(), mapperCls);
    } else {
        // shouldn't happen unless user defines their own Input class
        throw new IllegalArgumentException(String.format("Input %s has unknown input class %s", input.getName(), input.getClass().getCanonicalName()));
    }
}

Also used : MapperInput(co.cask.cdap.internal.app.runtime.batch.dataset.input.MapperInput) Input(co.cask.cdap.api.data.batch.Input) StreamInputFormatProvider(co.cask.cdap.internal.app.runtime.batch.stream.StreamInputFormatProvider) NamespaceId(co.cask.cdap.proto.id.NamespaceId)

Example 2 with Input

use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.

the class SparkBatchSourceContext method setInput.

@Override
public void setInput(Input input) {
    Input trackableInput = ExternalDatasets.makeTrackable(admin, suffixInput(input));
    sourceFactory.addInput(getStageName(), trackableInput);
}

Also used : Input(co.cask.cdap.api.data.batch.Input)

Example 3 with Input

use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.

the class MapReduceBatchContext method setInput.

@Override
public void setInput(final Input input) {
    Input trackableInput = CALLER.callUnchecked(new Callable<Input>() {

        @Override
        public Input call() throws Exception {
            Input trackableInput = ExternalDatasets.makeTrackable(mrContext.getAdmin(), suffixInput(input));
            mrContext.addInput(trackableInput);
            return trackableInput;
        }
    });
    inputNames.add(trackableInput.getAlias());
}

Also used : Input(co.cask.cdap.api.data.batch.Input) TopicNotFoundException(co.cask.cdap.api.messaging.TopicNotFoundException) TopicAlreadyExistsException(co.cask.cdap.api.messaging.TopicAlreadyExistsException) IOException(java.io.IOException)

Example 4 with Input

use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.

the class PartitionBatchInput method setInput.

/**
 * Used from the initialize method of the implementing batch job to configure as input a PartitionedFileSet that has
 * specified a set of {@link Partition}s of a {@link PartitionedFileSet} to be processed by the run of the batch job.
 * It does this by reading back the previous state, determining the new partitions to read, computing the new
 * state, and persisting this new state. It then configures this dataset as input to the mapreduce context that is
 * passed in.
 *
 * @param mapreduceContext MapReduce context used to access the PartitionedFileSet, and on which the input is
 *                         configured
 * @param partitionedFileSetName the name of the {@link PartitionedFileSet} to consume partitions from
 * @param statePersistor a {@link DatasetStatePersistor} responsible for defining how the partition consumer state is
 *                       managed
 * @param consumerConfiguration defines parameters for the partition consumption
 * @return a BatchPartitionCommitter used to persist the state of the partition consumer
 */
public static BatchPartitionCommitter setInput(MapReduceContext mapreduceContext, String partitionedFileSetName, DatasetStatePersistor statePersistor, ConsumerConfiguration consumerConfiguration) {
    PartitionedFileSet partitionedFileSet = mapreduceContext.getDataset(partitionedFileSetName);
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(partitionedFileSet, new DelegatingStatePersistor(mapreduceContext, statePersistor), consumerConfiguration);
    final List<PartitionDetail> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
    Map<String, String> arguments = new HashMap<>();
    PartitionedFileSetArguments.addInputPartitions(arguments, consumedPartitions);
    mapreduceContext.addInput(Input.ofDataset(partitionedFileSetName, arguments));
    return succeeded -> partitionConsumer.onFinish(consumedPartitions, succeeded);
}

Also used : Input(co.cask.cdap.api.data.batch.Input) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) DatasetStatePersistor(co.cask.cdap.api.dataset.lib.DatasetStatePersistor) MapReduceContext(co.cask.cdap.api.mapreduce.MapReduceContext) List(java.util.List) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Beta(co.cask.cdap.api.annotation.Beta) Map(java.util.Map) HashMap(java.util.HashMap) PartitionedFileSetArguments(co.cask.cdap.api.dataset.lib.PartitionedFileSetArguments) Partition(co.cask.cdap.api.dataset.lib.Partition) HashMap(java.util.HashMap) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail)

Example 5 with Input

use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.

the class StreamBatchSource method prepareRun.

@Override
public void prepareRun(BatchSourceContext context) {
    long duration = ETLUtils.parseDuration(streamBatchConfig.duration);
    long delay = Strings.isNullOrEmpty(streamBatchConfig.delay) ? 0 : ETLUtils.parseDuration(streamBatchConfig.delay);
    long endTime = context.getLogicalStartTime() - delay;
    long startTime = endTime - duration;
    LOG.info("Setting input to Stream : {}", streamBatchConfig.name);
    FormatSpecification formatSpec = streamBatchConfig.getFormatSpec();
    Input stream;
    if (formatSpec == null) {
        stream = Input.ofStream(streamBatchConfig.name, startTime, endTime);
    } else {
        stream = Input.ofStream(streamBatchConfig.name, startTime, endTime, formatSpec);
    }
    context.setInput(stream);
}

Also used : Input(co.cask.cdap.api.data.batch.Input) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification)

Aggregations

Input (co.cask.cdap.api.data.batch.Input)5 Beta (co.cask.cdap.api.annotation.Beta)1 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)1 DatasetStatePersistor (co.cask.cdap.api.dataset.lib.DatasetStatePersistor)1 Partition (co.cask.cdap.api.dataset.lib.Partition)1 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)1 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)1 PartitionedFileSetArguments (co.cask.cdap.api.dataset.lib.PartitionedFileSetArguments)1 MapReduceContext (co.cask.cdap.api.mapreduce.MapReduceContext)1 TopicAlreadyExistsException (co.cask.cdap.api.messaging.TopicAlreadyExistsException)1 TopicNotFoundException (co.cask.cdap.api.messaging.TopicNotFoundException)1 MapperInput (co.cask.cdap.internal.app.runtime.batch.dataset.input.MapperInput)1 StreamInputFormatProvider (co.cask.cdap.internal.app.runtime.batch.stream.StreamInputFormatProvider)1 NamespaceId (co.cask.cdap.proto.id.NamespaceId)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1