Search in sources :

Example 1 with DefaultPipelineConfigurer

use of io.cdap.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.

the class ValidationUtils method validate.

/**
 * Validate plugin based on the {@link StageValidationRequest}
 *
 * @param validationRequest {@link StageValidationRequest} with plugin properties
 * @param pluginConfigurer  {@link PluginConfigurer} for using the plugin
 * @param macroFn           {@link Function} for evaluating macros
 * @return {@link StageValidationResponse} in json format
 */
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
    ETLStage stageConfig = validationRequest.getStage();
    ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
    // Batch or Streaming doesn't matter for a single stage.
    PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
    DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
    for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
        stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
        stageConfigurer.addInputStage(stageSchema.getStage());
    }
    DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
    // evaluate macros
    Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
    ETLPlugin originalConfig = stageConfig.getPlugin();
    ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
    try {
        StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
        return new StageValidationResponse(spec);
    } catch (ValidationException e) {
        return new StageValidationResponse(e.getFailures());
    }
}
Also used : ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) BatchPipelineSpecGenerator(io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ValidatingConfigurer(io.cdap.cdap.etl.validation.ValidatingConfigurer) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse)

Example 2 with DefaultPipelineConfigurer

use of io.cdap.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.

the class PipelineSpecGenerator method configureStages.

/**
 * Performs most of the validation and configuration needed by a pipeline.
 * Handles stages, connections, resources, and stage logging settings.
 *
 * @param config user provided ETL config
 * @param specBuilder builder for creating a pipeline spec.
 * @throws ValidationException if the pipeline is invalid
 */
protected void configureStages(ETLConfig config, PipelineSpec.Builder specBuilder) throws ValidationException {
    // validate the config and determine the order we should configure the stages in.
    ValidatedPipeline validatedPipeline = validateConfig(config);
    List<ETLStage> traversalOrder = validatedPipeline.getTraversalOrder();
    Map<String, DefaultPipelineConfigurer> pluginConfigurers = new HashMap<>(traversalOrder.size());
    Map<String, String> pluginTypes = new HashMap<>(traversalOrder.size());
    for (ETLStage stage : traversalOrder) {
        String stageName = stage.getName();
        pluginTypes.put(stageName, stage.getPlugin().getType());
        pluginConfigurers.put(stageName, new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, stageName, engine, new DefaultStageConfigurer(stageName), featureFlagsProvider));
    }
    SchemaPropagator schemaPropagator = new SchemaPropagator(pluginConfigurers, validatedPipeline::getOutputs, pluginTypes::get);
    // anything prefixed by 'system.[engine].' is a pipeline property.
    Map<String, String> pipelineProperties = new HashMap<>();
    String prefix = String.format("system.%s.", engine.name().toLowerCase());
    int prefixLength = prefix.length();
    for (Map.Entry<String, String> property : config.getProperties().entrySet()) {
        if (property.getKey().startsWith(prefix)) {
            String strippedKey = property.getKey().substring(prefixLength);
            pipelineProperties.put(strippedKey, property.getValue());
        }
    }
    // row = property name, column = property value, val = stage that set the property
    // this is used so that we can error with a nice message about which stages are setting conflicting properties
    Table<String, String, String> propertiesFromStages = HashBasedTable.create();
    // configure the stages in order and build up the stage specs
    for (ETLStage stage : traversalOrder) {
        String stageName = stage.getName();
        DefaultPipelineConfigurer pluginConfigurer = pluginConfigurers.get(stageName);
        ConfiguredStage configuredStage = configureStage(stage, validatedPipeline, pluginConfigurer);
        schemaPropagator.propagateSchema(configuredStage.getStageSpec());
        specBuilder.addStage(configuredStage.getStageSpec());
        for (Map.Entry<String, String> propertyEntry : configuredStage.pipelineProperties.entrySet()) {
            propertiesFromStages.put(propertyEntry.getKey(), propertyEntry.getValue(), stageName);
        }
    }
    // check that multiple stages did not set conflicting properties
    for (String propertyName : propertiesFromStages.rowKeySet()) {
        // go through all values set for the property name. If there is more than one, we have a conflict.
        Map<String, String> propertyValues = propertiesFromStages.row(propertyName);
        if (propertyValues.size() > 1) {
            StringBuilder errMsg = new StringBuilder("Pipeline property '").append(propertyName).append("' is being set to different values by stages.");
            for (Map.Entry<String, String> valueEntry : propertyValues.entrySet()) {
                String propertyValue = valueEntry.getKey();
                String fromStage = valueEntry.getValue();
                errMsg.append(" stage '").append(fromStage).append("' = '").append(propertyValue).append("',");
            }
            errMsg.deleteCharAt(errMsg.length() - 1);
            throw new IllegalArgumentException(errMsg.toString());
        }
        pipelineProperties.put(propertyName, propertyValues.keySet().iterator().next());
    }
    specBuilder.addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).setProperties(pipelineProperties).addConnectionsUsed(connectionEvaluator.getUsedConnections()).build();
}
Also used : HashMap(java.util.HashMap) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) Map(java.util.Map) HashMap(java.util.HashMap)

Example 3 with DefaultPipelineConfigurer

use of io.cdap.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.

the class BatchPipelineSpecGenerator method configureSqlEngine.

private StageSpec configureSqlEngine(ETLBatchConfig config) throws ValidationException {
    if (!config.isPushdownEnabled() || config.getTransformationPushdown() == null || config.getTransformationPushdown().getPlugin() == null) {
        return null;
    }
    // Fixed name for SQL Engine config.
    String stageName = SQLEngineUtils.buildStageName(config.getTransformationPushdown().getPlugin().getName());
    ETLStage sqlEngineStage = new ETLStage(stageName, config.getTransformationPushdown().getPlugin());
    DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, stageName, engine, new DefaultStageConfigurer(stageName), getFeatureFlagsProvider());
    ConfiguredStage configuredStage = configureStage(sqlEngineStage, validateConfig(config), pipelineConfigurer);
    return configuredStage.getStageSpec();
}
Also used : ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer)

Example 4 with DefaultPipelineConfigurer

use of io.cdap.cdap.etl.common.DefaultPipelineConfigurer in project cdap by caskdata.

the class BatchPipelineSpecGenerator method generateSpec.

@Override
public BatchPipelineSpec generateSpec(ETLBatchConfig config) throws ValidationException {
    BatchPipelineSpec.Builder specBuilder = BatchPipelineSpec.builder();
    for (ETLStage endingAction : config.getPostActions()) {
        String name = endingAction.getName();
        DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, name, engine, new DefaultStageConfigurer(name), getFeatureFlagsProvider());
        StageSpec spec = configureStage(endingAction.getName(), endingAction.getPlugin(), pipelineConfigurer).build();
        specBuilder.addAction(new ActionSpec(name, spec.getPlugin()));
    }
    configureStages(config, specBuilder);
    // Configure SQL Engine
    StageSpec sqlEngineStageSpec = configureSqlEngine(config);
    if (sqlEngineStageSpec != null) {
        specBuilder.setSqlEngineStageSpec(sqlEngineStageSpec);
    }
    return specBuilder.build();
}
Also used : ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer)

Aggregations

DefaultPipelineConfigurer (io.cdap.cdap.etl.common.DefaultPipelineConfigurer)4 DefaultStageConfigurer (io.cdap.cdap.etl.common.DefaultStageConfigurer)4 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)4 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)2 ValidationException (io.cdap.cdap.etl.api.validation.ValidationException)1 BatchPipelineSpec (io.cdap.cdap.etl.batch.BatchPipelineSpec)1 BatchPipelineSpecGenerator (io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator)1 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)1 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)1 StageSchema (io.cdap.cdap.etl.proto.v2.validation.StageSchema)1 StageValidationResponse (io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse)1 ValidatingConfigurer (io.cdap.cdap.etl.validation.ValidatingConfigurer)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1