use of io.cdap.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.
the class ValidationUtils method validate.
/**
* Validate plugin based on the {@link StageValidationRequest}
*
* @param validationRequest {@link StageValidationRequest} with plugin properties
* @param pluginConfigurer {@link PluginConfigurer} for using the plugin
* @param macroFn {@link Function} for evaluating macros
* @return {@link StageValidationResponse} in json format
*/
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
ETLStage stageConfig = validationRequest.getStage();
ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
// Batch or Streaming doesn't matter for a single stage.
PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
stageConfigurer.addInputStage(stageSchema.getStage());
}
DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
// evaluate macros
Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
ETLPlugin originalConfig = stageConfig.getPlugin();
ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
try {
StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
return new StageValidationResponse(spec);
} catch (ValidationException e) {
return new StageValidationResponse(e.getFailures());
}
}
use of io.cdap.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.
the class PipelineSpecGenerator method configureStages.
/**
* Performs most of the validation and configuration needed by a pipeline.
* Handles stages, connections, resources, and stage logging settings.
*
* @param config user provided ETL config
* @param specBuilder builder for creating a pipeline spec.
* @throws ValidationException if the pipeline is invalid
*/
protected void configureStages(ETLConfig config, PipelineSpec.Builder specBuilder) throws ValidationException {
// validate the config and determine the order we should configure the stages in.
ValidatedPipeline validatedPipeline = validateConfig(config);
List<ETLStage> traversalOrder = validatedPipeline.getTraversalOrder();
Map<String, DefaultPipelineConfigurer> pluginConfigurers = new HashMap<>(traversalOrder.size());
Map<String, String> pluginTypes = new HashMap<>(traversalOrder.size());
for (ETLStage stage : traversalOrder) {
String stageName = stage.getName();
pluginTypes.put(stageName, stage.getPlugin().getType());
pluginConfigurers.put(stageName, new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, stageName, engine, new DefaultStageConfigurer(stageName), featureFlagsProvider));
}
SchemaPropagator schemaPropagator = new SchemaPropagator(pluginConfigurers, validatedPipeline::getOutputs, pluginTypes::get);
// anything prefixed by 'system.[engine].' is a pipeline property.
Map<String, String> pipelineProperties = new HashMap<>();
String prefix = String.format("system.%s.", engine.name().toLowerCase());
int prefixLength = prefix.length();
for (Map.Entry<String, String> property : config.getProperties().entrySet()) {
if (property.getKey().startsWith(prefix)) {
String strippedKey = property.getKey().substring(prefixLength);
pipelineProperties.put(strippedKey, property.getValue());
}
}
// row = property name, column = property value, val = stage that set the property
// this is used so that we can error with a nice message about which stages are setting conflicting properties
Table<String, String, String> propertiesFromStages = HashBasedTable.create();
// configure the stages in order and build up the stage specs
for (ETLStage stage : traversalOrder) {
String stageName = stage.getName();
DefaultPipelineConfigurer pluginConfigurer = pluginConfigurers.get(stageName);
ConfiguredStage configuredStage = configureStage(stage, validatedPipeline, pluginConfigurer);
schemaPropagator.propagateSchema(configuredStage.getStageSpec());
specBuilder.addStage(configuredStage.getStageSpec());
for (Map.Entry<String, String> propertyEntry : configuredStage.pipelineProperties.entrySet()) {
propertiesFromStages.put(propertyEntry.getKey(), propertyEntry.getValue(), stageName);
}
}
// check that multiple stages did not set conflicting properties
for (String propertyName : propertiesFromStages.rowKeySet()) {
// go through all values set for the property name. If there is more than one, we have a conflict.
Map<String, String> propertyValues = propertiesFromStages.row(propertyName);
if (propertyValues.size() > 1) {
StringBuilder errMsg = new StringBuilder("Pipeline property '").append(propertyName).append("' is being set to different values by stages.");
for (Map.Entry<String, String> valueEntry : propertyValues.entrySet()) {
String propertyValue = valueEntry.getKey();
String fromStage = valueEntry.getValue();
errMsg.append(" stage '").append(fromStage).append("' = '").append(propertyValue).append("',");
}
errMsg.deleteCharAt(errMsg.length() - 1);
throw new IllegalArgumentException(errMsg.toString());
}
pipelineProperties.put(propertyName, propertyValues.keySet().iterator().next());
}
specBuilder.addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).setProperties(pipelineProperties).addConnectionsUsed(connectionEvaluator.getUsedConnections()).build();
}
use of io.cdap.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.
the class BatchPipelineSpecGenerator method configureSqlEngine.
private StageSpec configureSqlEngine(ETLBatchConfig config) throws ValidationException {
if (!config.isPushdownEnabled() || config.getTransformationPushdown() == null || config.getTransformationPushdown().getPlugin() == null) {
return null;
}
// Fixed name for SQL Engine config.
String stageName = SQLEngineUtils.buildStageName(config.getTransformationPushdown().getPlugin().getName());
ETLStage sqlEngineStage = new ETLStage(stageName, config.getTransformationPushdown().getPlugin());
DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, stageName, engine, new DefaultStageConfigurer(stageName), getFeatureFlagsProvider());
ConfiguredStage configuredStage = configureStage(sqlEngineStage, validateConfig(config), pipelineConfigurer);
return configuredStage.getStageSpec();
}
use of io.cdap.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.
the class BatchPipelineSpecGenerator method generateSpec.
@Override
public BatchPipelineSpec generateSpec(ETLBatchConfig config) throws ValidationException {
BatchPipelineSpec.Builder specBuilder = BatchPipelineSpec.builder();
for (ETLStage endingAction : config.getPostActions()) {
String name = endingAction.getName();
DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, name, engine, new DefaultStageConfigurer(name), getFeatureFlagsProvider());
StageSpec spec = configureStage(endingAction.getName(), endingAction.getPlugin(), pipelineConfigurer).build();
specBuilder.addAction(new ActionSpec(name, spec.getPlugin()));
}
configureStages(config, specBuilder);
// Configure SQL Engine
StageSpec sqlEngineStageSpec = configureSqlEngine(config);
if (sqlEngineStageSpec != null) {
specBuilder.setSqlEngineStageSpec(sqlEngineStageSpec);
}
return specBuilder.build();
}
Aggregations