use of io.cdap.cdap.api.data.batch.Input in project cdap by caskdata.
the class MapReduceBatchContext method setInput.
@Override
public void setInput(Input input) {
Input wrapped = CALLER.callUnchecked(() -> {
Input trackableInput = input;
if (isPreviewEnabled && input instanceof Input.InputFormatProviderInput) {
InputFormatProvider inputFormatProvider = ((Input.InputFormatProviderInput) input).getInputFormatProvider();
LimitingInputFormatProvider wrapper = new LimitingInputFormatProvider(inputFormatProvider, getMaxPreviewRecords());
trackableInput = Input.of(input.getName(), wrapper).alias(input.getAlias());
}
trackableInput = ExternalDatasets.makeTrackable(mrContext.getAdmin(), suffixInput(trackableInput));
mrContext.addInput(trackableInput);
return trackableInput;
});
inputNames.add(wrapped.getAlias());
}
use of io.cdap.cdap.api.data.batch.Input in project cdap by caskdata.
the class PartitionBatchInput method setInput.
/**
* Used from the initialize method of the implementing batch job to configure as input a PartitionedFileSet that has
* specified a set of {@link Partition}s of a {@link PartitionedFileSet} to be processed by the run of the batch job.
* It does this by reading back the previous state, determining the new partitions to read, computing the new
* state, and persisting this new state. It then configures this dataset as input to the mapreduce context that is
* passed in.
*
* @param mapreduceContext MapReduce context used to access the PartitionedFileSet, and on which the input is
* configured
* @param partitionedFileSetName the name of the {@link PartitionedFileSet} to consume partitions from
* @param statePersistor a {@link DatasetStatePersistor} responsible for defining how the partition consumer state is
* managed
* @param consumerConfiguration defines parameters for the partition consumption
* @return a BatchPartitionCommitter used to persist the state of the partition consumer
*/
public static BatchPartitionCommitter setInput(MapReduceContext mapreduceContext, String partitionedFileSetName, DatasetStatePersistor statePersistor, ConsumerConfiguration consumerConfiguration) {
PartitionedFileSet partitionedFileSet = mapreduceContext.getDataset(partitionedFileSetName);
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(partitionedFileSet, new DelegatingStatePersistor(mapreduceContext, statePersistor), consumerConfiguration);
final List<PartitionDetail> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
Map<String, String> arguments = new HashMap<>();
PartitionedFileSetArguments.addInputPartitions(arguments, consumedPartitions);
mapreduceContext.addInput(Input.ofDataset(partitionedFileSetName, arguments));
return succeeded -> partitionConsumer.onFinish(consumedPartitions, succeeded);
}
use of io.cdap.cdap.api.data.batch.Input in project cdap by caskdata.
the class SparkBatchSourceContext method setInput.
@Override
public void setInput(Input input) {
Input trackableInput = input;
// Wrap the input provider with tracking counter for metrics collection via MR counter.
if (trackableInput instanceof Input.InputFormatProviderInput) {
InputFormatProvider provider = ((Input.InputFormatProviderInput) trackableInput).getInputFormatProvider();
Map<String, String> conf = new HashMap<>(provider.getInputFormatConfiguration());
conf.put(TrackingInputFormat.DELEGATE_CLASS_NAME, provider.getInputFormatClassName());
provider = new BasicInputFormatProvider(TrackingInputFormat.class.getName(), conf);
trackableInput = Input.of(trackableInput.getName(), provider).alias(trackableInput.getAlias());
}
// Limit preview input by wrapping the input
if (isPreviewEnabled && trackableInput instanceof Input.InputFormatProviderInput) {
InputFormatProvider inputFormatProvider = ((Input.InputFormatProviderInput) trackableInput).getInputFormatProvider();
LimitingInputFormatProvider wrapper = new LimitingInputFormatProvider(inputFormatProvider, getMaxPreviewRecords());
trackableInput = Input.of(trackableInput.getName(), wrapper).alias(trackableInput.getAlias());
}
trackableInput = ExternalDatasets.makeTrackable(admin, suffixInput(trackableInput));
sourceFactory.addInput(getStageName(), trackableInput);
}
Aggregations