Search in sources :

Example 1 with Input

use of io.cdap.cdap.api.data.batch.Input in project cdap by caskdata.

the class MapReduceBatchContext method setInput.

@Override
public void setInput(Input input) {
    Input wrapped = CALLER.callUnchecked(() -> {
        Input trackableInput = input;
        if (isPreviewEnabled && input instanceof Input.InputFormatProviderInput) {
            InputFormatProvider inputFormatProvider = ((Input.InputFormatProviderInput) input).getInputFormatProvider();
            LimitingInputFormatProvider wrapper = new LimitingInputFormatProvider(inputFormatProvider, getMaxPreviewRecords());
            trackableInput = Input.of(input.getName(), wrapper).alias(input.getAlias());
        }
        trackableInput = ExternalDatasets.makeTrackable(mrContext.getAdmin(), suffixInput(trackableInput));
        mrContext.addInput(trackableInput);
        return trackableInput;
    });
    inputNames.add(wrapped.getAlias());
}
Also used : Input(io.cdap.cdap.api.data.batch.Input) InputFormatProvider(io.cdap.cdap.api.data.batch.InputFormatProvider) LimitingInputFormatProvider(io.cdap.cdap.etl.batch.preview.LimitingInputFormatProvider) LimitingInputFormatProvider(io.cdap.cdap.etl.batch.preview.LimitingInputFormatProvider)

Example 2 with Input

use of io.cdap.cdap.api.data.batch.Input in project cdap by caskdata.

the class PartitionBatchInput method setInput.

/**
 * Used from the initialize method of the implementing batch job to configure as input a PartitionedFileSet that has
 * specified a set of {@link Partition}s of a {@link PartitionedFileSet} to be processed by the run of the batch job.
 * It does this by reading back the previous state, determining the new partitions to read, computing the new
 * state, and persisting this new state. It then configures this dataset as input to the mapreduce context that is
 * passed in.
 *
 * @param mapreduceContext MapReduce context used to access the PartitionedFileSet, and on which the input is
 *                         configured
 * @param partitionedFileSetName the name of the {@link PartitionedFileSet} to consume partitions from
 * @param statePersistor a {@link DatasetStatePersistor} responsible for defining how the partition consumer state is
 *                       managed
 * @param consumerConfiguration defines parameters for the partition consumption
 * @return a BatchPartitionCommitter used to persist the state of the partition consumer
 */
public static BatchPartitionCommitter setInput(MapReduceContext mapreduceContext, String partitionedFileSetName, DatasetStatePersistor statePersistor, ConsumerConfiguration consumerConfiguration) {
    PartitionedFileSet partitionedFileSet = mapreduceContext.getDataset(partitionedFileSetName);
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(partitionedFileSet, new DelegatingStatePersistor(mapreduceContext, statePersistor), consumerConfiguration);
    final List<PartitionDetail> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
    Map<String, String> arguments = new HashMap<>();
    PartitionedFileSetArguments.addInputPartitions(arguments, consumedPartitions);
    mapreduceContext.addInput(Input.ofDataset(partitionedFileSetName, arguments));
    return succeeded -> partitionConsumer.onFinish(consumedPartitions, succeeded);
}
Also used : Input(io.cdap.cdap.api.data.batch.Input) DatasetStatePersistor(io.cdap.cdap.api.dataset.lib.DatasetStatePersistor) PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail) List(java.util.List) PartitionedFileSetArguments(io.cdap.cdap.api.dataset.lib.PartitionedFileSetArguments) Beta(io.cdap.cdap.api.annotation.Beta) Map(java.util.Map) Partition(io.cdap.cdap.api.dataset.lib.Partition) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) HashMap(java.util.HashMap) MapReduceContext(io.cdap.cdap.api.mapreduce.MapReduceContext) HashMap(java.util.HashMap) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail)

Example 3 with Input

use of io.cdap.cdap.api.data.batch.Input in project cdap by caskdata.

the class SparkBatchSourceContext method setInput.

@Override
public void setInput(Input input) {
    Input trackableInput = input;
    // Wrap the input provider with tracking counter for metrics collection via MR counter.
    if (trackableInput instanceof Input.InputFormatProviderInput) {
        InputFormatProvider provider = ((Input.InputFormatProviderInput) trackableInput).getInputFormatProvider();
        Map<String, String> conf = new HashMap<>(provider.getInputFormatConfiguration());
        conf.put(TrackingInputFormat.DELEGATE_CLASS_NAME, provider.getInputFormatClassName());
        provider = new BasicInputFormatProvider(TrackingInputFormat.class.getName(), conf);
        trackableInput = Input.of(trackableInput.getName(), provider).alias(trackableInput.getAlias());
    }
    // Limit preview input by wrapping the input
    if (isPreviewEnabled && trackableInput instanceof Input.InputFormatProviderInput) {
        InputFormatProvider inputFormatProvider = ((Input.InputFormatProviderInput) trackableInput).getInputFormatProvider();
        LimitingInputFormatProvider wrapper = new LimitingInputFormatProvider(inputFormatProvider, getMaxPreviewRecords());
        trackableInput = Input.of(trackableInput.getName(), wrapper).alias(trackableInput.getAlias());
    }
    trackableInput = ExternalDatasets.makeTrackable(admin, suffixInput(trackableInput));
    sourceFactory.addInput(getStageName(), trackableInput);
}
Also used : Input(io.cdap.cdap.api.data.batch.Input) BasicInputFormatProvider(io.cdap.cdap.etl.batch.BasicInputFormatProvider) LimitingInputFormatProvider(io.cdap.cdap.etl.batch.preview.LimitingInputFormatProvider) InputFormatProvider(io.cdap.cdap.api.data.batch.InputFormatProvider) HashMap(java.util.HashMap) BasicInputFormatProvider(io.cdap.cdap.etl.batch.BasicInputFormatProvider) LimitingInputFormatProvider(io.cdap.cdap.etl.batch.preview.LimitingInputFormatProvider)

Aggregations

Input (io.cdap.cdap.api.data.batch.Input)3 InputFormatProvider (io.cdap.cdap.api.data.batch.InputFormatProvider)2 LimitingInputFormatProvider (io.cdap.cdap.etl.batch.preview.LimitingInputFormatProvider)2 HashMap (java.util.HashMap)2 Beta (io.cdap.cdap.api.annotation.Beta)1 DatasetStatePersistor (io.cdap.cdap.api.dataset.lib.DatasetStatePersistor)1 Partition (io.cdap.cdap.api.dataset.lib.Partition)1 PartitionDetail (io.cdap.cdap.api.dataset.lib.PartitionDetail)1 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)1 PartitionedFileSetArguments (io.cdap.cdap.api.dataset.lib.PartitionedFileSetArguments)1 MapReduceContext (io.cdap.cdap.api.mapreduce.MapReduceContext)1 BasicInputFormatProvider (io.cdap.cdap.etl.batch.BasicInputFormatProvider)1 List (java.util.List)1 Map (java.util.Map)1