use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.
the class ValidationUtils method validate.
/**
* Validate plugin based on the {@link StageValidationRequest}
*
* @param validationRequest {@link StageValidationRequest} with plugin properties
* @param pluginConfigurer {@link PluginConfigurer} for using the plugin
* @param macroFn {@link Function} for evaluating macros
* @return {@link StageValidationResponse} in json format
*/
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
ETLStage stageConfig = validationRequest.getStage();
ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
// Batch or Streaming doesn't matter for a single stage.
PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
stageConfigurer.addInputStage(stageSchema.getStage());
}
DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
// evaluate macros
Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
ETLPlugin originalConfig = stageConfig.getPlugin();
ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
try {
StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
return new StageValidationResponse(spec);
} catch (ValidationException e) {
return new StageValidationResponse(e.getFailures());
}
}
use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.
the class PipelineSpecGenerator method configureStage.
/**
* Configures a plugin and returns the spec for it.
*
* @param stageName the unique plugin id
* @param etlPlugin user provided configuration for the plugin
* @param pipelineConfigurer default pipeline configurer to configure the plugin
* @return the spec for the plugin
* @throws IllegalArgumentException if the plugin with same id is already deployed
* @throws ValidationException if the plugin threw an exception during configuration
*/
public StageSpec.Builder configureStage(String stageName, ETLPlugin etlPlugin, DefaultPipelineConfigurer pipelineConfigurer) throws ValidationException {
TrackedPluginSelector pluginSelector = new TrackedPluginSelector(new ArtifactSelectorProvider().getPluginSelector(etlPlugin.getArtifactConfig()));
String type = etlPlugin.getType();
String pluginName = etlPlugin.getName();
DefaultStageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer();
FailureCollector collector = stageConfigurer.getFailureCollector();
Object plugin = getPlugin(stageName, etlPlugin, pluginSelector, type, pluginName, collector);
try {
if (type.equals(BatchJoiner.PLUGIN_TYPE)) {
MultiInputPipelineConfigurable multiPlugin = (MultiInputPipelineConfigurable) plugin;
multiPlugin.configurePipeline(pipelineConfigurer);
// to the BatchAutoJoiner while preserving backwards compatibility in the pipeline config.
if (plugin instanceof AutoJoiner) {
configureAutoJoiner(stageName, (AutoJoiner) plugin, stageConfigurer, collector);
}
} else if (type.equals(SplitterTransform.PLUGIN_TYPE)) {
MultiOutputPipelineConfigurable multiOutputPlugin = (MultiOutputPipelineConfigurable) plugin;
multiOutputPlugin.configurePipeline(pipelineConfigurer);
} else if (!type.equals(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
PipelineConfigurable singlePlugin = (PipelineConfigurable) plugin;
singlePlugin.configurePipeline(pipelineConfigurer);
// evaluate macros and find out if there is connection used
if ((sourcePluginTypes.contains(type) || BatchSink.PLUGIN_TYPE.equals(type)) && runtimeEvaluator == null) {
pluginConfigurer.evaluateMacros(etlPlugin.getProperties(), connectionEvaluator, options);
}
}
} catch (InvalidConfigPropertyException e) {
collector.addFailure(e.getMessage(), String.format("Provide valid value for config property '%s'.", e.getProperty())).withConfigProperty(e.getProperty());
} catch (InvalidStageException e) {
if (e.getReasons().isEmpty()) {
collector.addFailure(e.getMessage(), null);
}
for (InvalidStageException reason : e.getReasons()) {
if (reason instanceof InvalidConfigPropertyException) {
InvalidConfigPropertyException configException = (InvalidConfigPropertyException) reason;
collector.addFailure(configException.getMessage(), String.format("Provide valid value for config property '%s'.", configException.getProperty())).withConfigProperty(configException.getProperty());
} else {
collector.addFailure(reason.getMessage(), null);
}
}
} catch (ValidationException e) {
throw e;
} catch (NullPointerException e) {
// handle the case where plugin throws null pointer exception, this is to avoid having 'null' as error message
collector.addFailure(String.format("Null error occurred while configuring the stage %s.", stageName), null).withStacktrace(e.getStackTrace());
} catch (ArrayIndexOutOfBoundsException e) {
// handle the case where plugin throws index out of bounds exception,
// this is to avoid having a number like '2', '8' etc as error message
collector.addFailure(String.format("Index out of bounds error occurred while configuring the stage %s.", stageName), null).withStacktrace(e.getStackTrace());
} catch (ConnectionBadRequestException e) {
collector.addFailure(e.getMessage(), "Provide a valid connection name.");
} catch (Exception e) {
collector.addFailure(String.format("Error encountered while configuring the stage: '%s'", e.getMessage()), null).withStacktrace(e.getStackTrace());
}
// throw validation exception if there are any errors being carried by failure collector
collector.getOrThrowException();
PluginSpec pluginSpec = new PluginSpec(type, pluginName, etlPlugin.getProperties(), pluginSelector.getSelectedArtifact());
StageSpec.Builder specBuilder = StageSpec.builder(stageName, pluginSpec).addInputSchemas(pipelineConfigurer.getStageConfigurer().getInputSchemas()).setErrorSchema(stageConfigurer.getErrorSchema());
if (type.equals(SplitterTransform.PLUGIN_TYPE)) {
specBuilder.setPortSchemas(stageConfigurer.getOutputPortSchemas());
} else {
specBuilder.setOutputSchema(stageConfigurer.getOutputSchema());
}
return specBuilder;
}
use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.
the class PipelineSpecGenerator method configureStages.
/**
* Performs most of the validation and configuration needed by a pipeline.
* Handles stages, connections, resources, and stage logging settings.
*
* @param config user provided ETL config
* @param specBuilder builder for creating a pipeline spec.
* @throws ValidationException if the pipeline is invalid
*/
protected void configureStages(ETLConfig config, PipelineSpec.Builder specBuilder) throws ValidationException {
// validate the config and determine the order we should configure the stages in.
ValidatedPipeline validatedPipeline = validateConfig(config);
List<ETLStage> traversalOrder = validatedPipeline.getTraversalOrder();
Map<String, DefaultPipelineConfigurer> pluginConfigurers = new HashMap<>(traversalOrder.size());
Map<String, String> pluginTypes = new HashMap<>(traversalOrder.size());
for (ETLStage stage : traversalOrder) {
String stageName = stage.getName();
pluginTypes.put(stageName, stage.getPlugin().getType());
pluginConfigurers.put(stageName, new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, stageName, engine, new DefaultStageConfigurer(stageName), featureFlagsProvider));
}
SchemaPropagator schemaPropagator = new SchemaPropagator(pluginConfigurers, validatedPipeline::getOutputs, pluginTypes::get);
// anything prefixed by 'system.[engine].' is a pipeline property.
Map<String, String> pipelineProperties = new HashMap<>();
String prefix = String.format("system.%s.", engine.name().toLowerCase());
int prefixLength = prefix.length();
for (Map.Entry<String, String> property : config.getProperties().entrySet()) {
if (property.getKey().startsWith(prefix)) {
String strippedKey = property.getKey().substring(prefixLength);
pipelineProperties.put(strippedKey, property.getValue());
}
}
// row = property name, column = property value, val = stage that set the property
// this is used so that we can error with a nice message about which stages are setting conflicting properties
Table<String, String, String> propertiesFromStages = HashBasedTable.create();
// configure the stages in order and build up the stage specs
for (ETLStage stage : traversalOrder) {
String stageName = stage.getName();
DefaultPipelineConfigurer pluginConfigurer = pluginConfigurers.get(stageName);
ConfiguredStage configuredStage = configureStage(stage, validatedPipeline, pluginConfigurer);
schemaPropagator.propagateSchema(configuredStage.getStageSpec());
specBuilder.addStage(configuredStage.getStageSpec());
for (Map.Entry<String, String> propertyEntry : configuredStage.pipelineProperties.entrySet()) {
propertiesFromStages.put(propertyEntry.getKey(), propertyEntry.getValue(), stageName);
}
}
// check that multiple stages did not set conflicting properties
for (String propertyName : propertiesFromStages.rowKeySet()) {
// go through all values set for the property name. If there is more than one, we have a conflict.
Map<String, String> propertyValues = propertiesFromStages.row(propertyName);
if (propertyValues.size() > 1) {
StringBuilder errMsg = new StringBuilder("Pipeline property '").append(propertyName).append("' is being set to different values by stages.");
for (Map.Entry<String, String> valueEntry : propertyValues.entrySet()) {
String propertyValue = valueEntry.getKey();
String fromStage = valueEntry.getValue();
errMsg.append(" stage '").append(fromStage).append("' = '").append(propertyValue).append("',");
}
errMsg.deleteCharAt(errMsg.length() - 1);
throw new IllegalArgumentException(errMsg.toString());
}
pipelineProperties.put(propertyName, propertyValues.keySet().iterator().next());
}
specBuilder.addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).setProperties(pipelineProperties).addConnectionsUsed(connectionEvaluator.getUsedConnections()).build();
}
use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.
the class SchemaPropagator method propagateSchema.
/**
* Propagate the output schema set for this stage as the input schema for all of its outputs.
*
* @param stageSpec the specification for the stage that was just configured
*/
public void propagateSchema(StageSpec stageSpec) {
String stageName = stageSpec.getName();
Set<String> nextStages = stageOutputsProvider.apply(stageName);
for (String nextStageName : nextStages) {
String nextStageType = stageTypeProvider.apply(nextStageName);
Schema nextStageInputSchema = getNextStageInputSchema(stageSpec, nextStageName, nextStageType);
DefaultStageConfigurer outputStageConfigurer = pluginConfigurers.get(nextStageName).getStageConfigurer();
// Do not allow more than one input schema for stages other than Joiner and Action
if (!BatchJoiner.PLUGIN_TYPE.equals(nextStageType) && !Action.PLUGIN_TYPE.equals(nextStageType) && !Condition.PLUGIN_TYPE.equals(nextStageType) && !hasSameSchema(outputStageConfigurer.getInputSchemas(), nextStageInputSchema)) {
throw new IllegalArgumentException("Two different input schema were set for the stage " + nextStageName);
}
outputStageConfigurer.addInputSchema(stageName, nextStageInputSchema);
outputStageConfigurer.addInputStage(stageName);
}
}
use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.
the class BatchPipelineSpecGenerator method configureSqlEngine.
private StageSpec configureSqlEngine(ETLBatchConfig config) throws ValidationException {
if (!config.isPushdownEnabled() || config.getTransformationPushdown() == null || config.getTransformationPushdown().getPlugin() == null) {
return null;
}
// Fixed name for SQL Engine config.
String stageName = SQLEngineUtils.buildStageName(config.getTransformationPushdown().getPlugin().getName());
ETLStage sqlEngineStage = new ETLStage(stageName, config.getTransformationPushdown().getPlugin());
DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, stageName, engine, new DefaultStageConfigurer(stageName), getFeatureFlagsProvider());
ConfiguredStage configuredStage = configureStage(sqlEngineStage, validateConfig(config), pipelineConfigurer);
return configuredStage.getStageSpec();
}
Aggregations