use of co.cask.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.
the class PipelineSpecGenerator method configureStages.
/**
* Performs most of the validation and configuration needed by a pipeline.
* Handles stages, connections, resources, and stage logging settings.
*
* @param config user provided ETL config
* @param specBuilder builder for creating a pipeline spec.
*/
protected void configureStages(ETLConfig config, PipelineSpec.Builder specBuilder) {
// validate the config and determine the order we should configure the stages in.
ValidatedPipeline validatedPipeline = validateConfig(config);
List<ETLStage> traversalOrder = validatedPipeline.getTraversalOrder();
Map<String, DefaultPipelineConfigurer<T>> pluginConfigurers = new HashMap<>(traversalOrder.size());
Map<String, String> pluginTypes = new HashMap<>(traversalOrder.size());
for (ETLStage stage : traversalOrder) {
String stageName = stage.getName();
pluginTypes.put(stageName, stage.getPlugin().getType());
pluginConfigurers.put(stageName, new DefaultPipelineConfigurer<>(configurer, stageName, engine));
}
// anything prefixed by 'system.[engine].' is a pipeline property.
Map<String, String> pipelineProperties = new HashMap<>();
String prefix = String.format("system.%s.", engine.name().toLowerCase());
int prefixLength = prefix.length();
for (Map.Entry<String, String> property : config.getProperties().entrySet()) {
if (property.getKey().startsWith(prefix)) {
String strippedKey = property.getKey().substring(prefixLength);
pipelineProperties.put(strippedKey, property.getValue());
}
}
// row = property name, column = property value, val = stage that set the property
// this is used so that we can error with a nice message about which stages are setting conflicting properties
Table<String, String, String> propertiesFromStages = HashBasedTable.create();
// configure the stages in order and build up the stage specs
for (ETLStage stage : traversalOrder) {
String stageName = stage.getName();
DefaultPipelineConfigurer<T> pluginConfigurer = pluginConfigurers.get(stageName);
ConfiguredStage configuredStage = configureStage(stage, validatedPipeline, pluginConfigurer);
// for each output, set their input schema to our output schema
for (String nextStageName : validatedPipeline.getOutputs(stageName)) {
String nextStageType = pluginTypes.get(nextStageName);
DefaultStageConfigurer outputStageConfigurer = pluginConfigurers.get(nextStageName).getStageConfigurer();
// if the output stage is an error transform, it takes the error schema of this stage as its input.
// if the current stage is a splitter transform, it takes the output schema of the port it is connected to
// all other plugin types that the output schema of this stage as its input.
Schema nextStageInputSchema;
if (ErrorTransform.PLUGIN_TYPE.equals(nextStageType)) {
nextStageInputSchema = configuredStage.stageSpec.getErrorSchema();
} else if (SplitterTransform.PLUGIN_TYPE.equals(configuredStage.stageSpec.getPlugin().getType())) {
StageSpec.Port portSpec = configuredStage.stageSpec.getOutputPorts().get(nextStageName);
// this can happen if the ports are dependent on the data received by the plugin
if (portSpec == null) {
nextStageInputSchema = null;
} else if (portSpec.getPort() == null) {
// Should not happen since it should have been validated earlier, but check here just in case
throw new IllegalArgumentException(String.format("Must specify a port when connecting Splitter '%s' to '%s'", stageName, nextStageName));
} else {
nextStageInputSchema = portSpec.getSchema();
}
} else {
nextStageInputSchema = configuredStage.stageSpec.getOutputSchema();
}
// Do not allow null input schema for Joiner
if (BatchJoiner.PLUGIN_TYPE.equals(nextStageType) && nextStageInputSchema == null) {
throw new IllegalArgumentException(String.format("Joiner cannot have any null input schemas, but stage %s " + "outputs a null schema.", stageName));
}
// Do not allow more than one input schema for stages other than Joiner and Action
if (!BatchJoiner.PLUGIN_TYPE.equals(nextStageType) && !Action.PLUGIN_TYPE.equals(nextStageType) && !Condition.PLUGIN_TYPE.equals(nextStageType) && !hasSameSchema(outputStageConfigurer.getInputSchemas(), nextStageInputSchema)) {
throw new IllegalArgumentException("Two different input schema were set for the stage " + nextStageName);
}
outputStageConfigurer.addInputSchema(stageName, nextStageInputSchema);
}
specBuilder.addStage(configuredStage.stageSpec);
for (Map.Entry<String, String> propertyEntry : configuredStage.pipelineProperties.entrySet()) {
propertiesFromStages.put(propertyEntry.getKey(), propertyEntry.getValue(), stageName);
}
}
// check that multiple stages did not set conflicting properties
for (String propertyName : propertiesFromStages.rowKeySet()) {
// go through all values set for the property name. If there is more than one, we have a conflict.
Map<String, String> propertyValues = propertiesFromStages.row(propertyName);
if (propertyValues.size() > 1) {
StringBuilder errMsg = new StringBuilder("Pipeline property '").append(propertyName).append("' is being set to different values by stages.");
for (Map.Entry<String, String> valueEntry : propertyValues.entrySet()) {
String propertyValue = valueEntry.getKey();
String fromStage = valueEntry.getValue();
errMsg.append(" stage '").append(fromStage).append("' = '").append(propertyValue).append("',");
}
errMsg.deleteCharAt(errMsg.length() - 1);
throw new IllegalArgumentException(errMsg.toString());
}
pipelineProperties.put(propertyName, propertyValues.keySet().iterator().next());
}
specBuilder.addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).setProperties(pipelineProperties).build();
}
use of co.cask.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.
the class BatchPipelineSpecGenerator method generateSpec.
@Override
public BatchPipelineSpec generateSpec(ETLBatchConfig config) {
BatchPipelineSpec.Builder specBuilder = BatchPipelineSpec.builder();
for (ETLStage endingAction : config.getPostActions()) {
String name = endingAction.getName();
DefaultPipelineConfigurer<T> pipelineConfigurer = new DefaultPipelineConfigurer<>(configurer, name, engine);
PluginSpec pluginSpec = configurePlugin(endingAction.getName(), endingAction.getPlugin(), pipelineConfigurer);
specBuilder.addAction(new ActionSpec(name, pluginSpec));
}
configureStages(config, specBuilder);
return specBuilder.build();
}
Aggregations