use of io.cdap.cdap.etl.api.FailureCollector in project cdap by caskdata.
the class SparkStreamingPipelineRunner method handleJoin.
@Override
protected SparkCollection<Object> handleJoin(Map<String, SparkCollection<Object>> inputDataCollections, PipelinePhase pipelinePhase, PluginFunctionContext pluginFunctionContext, StageSpec stageSpec, FunctionCache.Factory functionCacheFactory, Object plugin, Integer numPartitions, StageStatisticsCollector collector, Set<String> shufflers) throws Exception {
String stageName = stageSpec.getName();
BatchJoiner<?, ?, ?> joiner;
if (plugin instanceof BatchAutoJoiner) {
BatchAutoJoiner autoJoiner = (BatchAutoJoiner) plugin;
Map<String, Schema> inputSchemas = new HashMap<>();
for (String inputStageName : pipelinePhase.getStageInputs(stageName)) {
StageSpec inputStageSpec = pipelinePhase.getStage(inputStageName);
inputSchemas.put(inputStageName, inputStageSpec.getOutputSchema());
}
FailureCollector failureCollector = new LoggingFailureCollector(stageName, inputSchemas);
AutoJoinerContext autoJoinerContext = DefaultAutoJoinerContext.from(inputSchemas, failureCollector);
failureCollector.getOrThrowException();
JoinDefinition joinDefinition = autoJoiner.define(autoJoinerContext);
if (joinDefinition == null) {
throw new IllegalStateException(String.format("Joiner stage '%s' did not specify a join definition. " + "Check with the plugin developer to ensure it is implemented correctly.", stageName));
}
joiner = new JoinerBridge(stageName, autoJoiner, joinDefinition);
} else if (plugin instanceof BatchJoiner) {
joiner = (BatchJoiner) plugin;
} else {
// should never happen unless there is a bug in the code. should have failed during deployment
throw new IllegalStateException(String.format("Stage '%s' is an unknown joiner type %s", stageName, plugin.getClass().getName()));
}
BatchJoinerRuntimeContext joinerRuntimeContext = pluginFunctionContext.createBatchRuntimeContext();
joiner.initialize(joinerRuntimeContext);
shufflers.add(stageName);
return handleJoin(joiner, inputDataCollections, stageSpec, functionCacheFactory, numPartitions, collector);
}
use of io.cdap.cdap.etl.api.FailureCollector in project hydrator-plugins by cdapio.
the class FileStreamingSource method getStream.
@Override
public JavaDStream<StructuredRecord> getStream(StreamingContext context) throws Exception {
FailureCollector collector = context.getFailureCollector();
conf.validate(collector);
conf.getSchema(collector);
collector.getOrThrowException();
JavaStreamingContext jsc = context.getSparkStreamingContext();
return FileStreamingSourceUtil.getJavaDStream(jsc, conf);
}
use of io.cdap.cdap.etl.api.FailureCollector in project hydrator-plugins by cdapio.
the class HTTPPollerSource method configurePipeline.
@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
super.configurePipeline(pipelineConfigurer);
FailureCollector collector = pipelineConfigurer.getStageConfigurer().getFailureCollector();
conf.validate(collector);
}
use of io.cdap.cdap.etl.api.FailureCollector in project hydrator-plugins by cdapio.
the class HTTPPollerSource method getStream.
@Override
public JavaDStream<StructuredRecord> getStream(StreamingContext streamingContext) {
FailureCollector collector = streamingContext.getFailureCollector();
conf.validate(collector);
collector.getOrThrowException();
return HTTPPollerSourceUtil.getJavaDStream(streamingContext, conf);
}
use of io.cdap.cdap.etl.api.FailureCollector in project hydrator-plugins by cdapio.
the class ReferenceStreamingSource method configurePipeline.
@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException {
super.configurePipeline(pipelineConfigurer);
// Verify that reference name meets dataset id constraints
FailureCollector collector = pipelineConfigurer.getStageConfigurer().getFailureCollector();
IdUtils.validateReferenceName(conf.referenceName, collector);
// if reference name is not valid, throw an exception before creating external dataset
collector.getOrThrowException();
pipelineConfigurer.createDataset(conf.referenceName, Constants.EXTERNAL_DATASET_TYPE, DatasetProperties.EMPTY);
}
Aggregations