use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.
the class BasicMapReduceContext method addInput.
@Override
public void addInput(Input input, @Nullable Class<?> mapperCls) {
if (input.getNamespace() != null && input.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) {
// trying to access system namespace from a program outside system namespace is not allowed
throw new IllegalArgumentException(String.format("Accessing Input %s in system namespace " + "is not allowed from the namespace %s", input.getName(), getProgram().getNamespaceId()));
}
if (input instanceof Input.DatasetInput) {
Input.DatasetInput datasetInput = (Input.DatasetInput) input;
Input.InputFormatProviderInput createdInput = createInput(datasetInput);
addInput(createdInput.getAlias(), createdInput.getInputFormatProvider(), mapperCls);
} else if (input instanceof Input.StreamInput) {
Input.StreamInput streamInput = (Input.StreamInput) input;
String namespace = streamInput.getNamespace();
if (namespace == null) {
namespace = getProgram().getNamespaceId();
}
addInput(input.getAlias(), new StreamInputFormatProvider(new NamespaceId(namespace), streamInput, streamAdmin), mapperCls);
} else if (input instanceof Input.InputFormatProviderInput) {
addInput(input.getAlias(), ((Input.InputFormatProviderInput) input).getInputFormatProvider(), mapperCls);
} else {
// shouldn't happen unless user defines their own Input class
throw new IllegalArgumentException(String.format("Input %s has unknown input class %s", input.getName(), input.getClass().getCanonicalName()));
}
}
use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.
the class SparkBatchSourceContext method setInput.
@Override
public void setInput(Input input) {
Input trackableInput = ExternalDatasets.makeTrackable(admin, suffixInput(input));
sourceFactory.addInput(getStageName(), trackableInput);
}
use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.
the class MapReduceBatchContext method setInput.
@Override
public void setInput(final Input input) {
Input trackableInput = CALLER.callUnchecked(new Callable<Input>() {
@Override
public Input call() throws Exception {
Input trackableInput = ExternalDatasets.makeTrackable(mrContext.getAdmin(), suffixInput(input));
mrContext.addInput(trackableInput);
return trackableInput;
}
});
inputNames.add(trackableInput.getAlias());
}
use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.
the class PartitionBatchInput method setInput.
/**
* Used from the initialize method of the implementing batch job to configure as input a PartitionedFileSet that has
* specified a set of {@link Partition}s of a {@link PartitionedFileSet} to be processed by the run of the batch job.
* It does this by reading back the previous state, determining the new partitions to read, computing the new
* state, and persisting this new state. It then configures this dataset as input to the mapreduce context that is
* passed in.
*
* @param mapreduceContext MapReduce context used to access the PartitionedFileSet, and on which the input is
* configured
* @param partitionedFileSetName the name of the {@link PartitionedFileSet} to consume partitions from
* @param statePersistor a {@link DatasetStatePersistor} responsible for defining how the partition consumer state is
* managed
* @param consumerConfiguration defines parameters for the partition consumption
* @return a BatchPartitionCommitter used to persist the state of the partition consumer
*/
public static BatchPartitionCommitter setInput(MapReduceContext mapreduceContext, String partitionedFileSetName, DatasetStatePersistor statePersistor, ConsumerConfiguration consumerConfiguration) {
PartitionedFileSet partitionedFileSet = mapreduceContext.getDataset(partitionedFileSetName);
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(partitionedFileSet, new DelegatingStatePersistor(mapreduceContext, statePersistor), consumerConfiguration);
final List<PartitionDetail> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
Map<String, String> arguments = new HashMap<>();
PartitionedFileSetArguments.addInputPartitions(arguments, consumedPartitions);
mapreduceContext.addInput(Input.ofDataset(partitionedFileSetName, arguments));
return succeeded -> partitionConsumer.onFinish(consumedPartitions, succeeded);
}
use of co.cask.cdap.api.data.batch.Input in project cdap by caskdata.
the class StreamBatchSource method prepareRun.
@Override
public void prepareRun(BatchSourceContext context) {
long duration = ETLUtils.parseDuration(streamBatchConfig.duration);
long delay = Strings.isNullOrEmpty(streamBatchConfig.delay) ? 0 : ETLUtils.parseDuration(streamBatchConfig.delay);
long endTime = context.getLogicalStartTime() - delay;
long startTime = endTime - duration;
LOG.info("Setting input to Stream : {}", streamBatchConfig.name);
FormatSpecification formatSpec = streamBatchConfig.getFormatSpec();
Input stream;
if (formatSpec == null) {
stream = Input.ofStream(streamBatchConfig.name, startTime, endTime);
} else {
stream = Input.ofStream(streamBatchConfig.name, startTime, endTime, formatSpec);
}
context.setInput(stream);
}
Aggregations