Search in sources :

Example 1 with DefaultStageConfigurer

use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.

the class ValidationUtils method validate.

/**
 * Validate plugin based on the {@link StageValidationRequest}
 *
 * @param validationRequest {@link StageValidationRequest} with plugin properties
 * @param pluginConfigurer  {@link PluginConfigurer} for using the plugin
 * @param macroFn           {@link Function} for evaluating macros
 * @return {@link StageValidationResponse} in json format
 */
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
    ETLStage stageConfig = validationRequest.getStage();
    ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
    // Batch or Streaming doesn't matter for a single stage.
    PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
    DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
    for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
        stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
        stageConfigurer.addInputStage(stageSchema.getStage());
    }
    DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
    // evaluate macros
    Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
    ETLPlugin originalConfig = stageConfig.getPlugin();
    ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
    try {
        StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
        return new StageValidationResponse(spec);
    } catch (ValidationException e) {
        return new StageValidationResponse(e.getFailures());
    }
}
Also used : ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) BatchPipelineSpecGenerator(io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ValidatingConfigurer(io.cdap.cdap.etl.validation.ValidatingConfigurer) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse)

Example 2 with DefaultStageConfigurer

use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.

the class PipelineSpecGenerator method configureStage.

/**
 * Configures a plugin and returns the spec for it.
 *
 * @param stageName the unique plugin id
 * @param etlPlugin user provided configuration for the plugin
 * @param pipelineConfigurer default pipeline configurer to configure the plugin
 * @return the spec for the plugin
 * @throws IllegalArgumentException if the plugin with same id is already deployed
 * @throws ValidationException if the plugin threw an exception during configuration
 */
public StageSpec.Builder configureStage(String stageName, ETLPlugin etlPlugin, DefaultPipelineConfigurer pipelineConfigurer) throws ValidationException {
    TrackedPluginSelector pluginSelector = new TrackedPluginSelector(new ArtifactSelectorProvider().getPluginSelector(etlPlugin.getArtifactConfig()));
    String type = etlPlugin.getType();
    String pluginName = etlPlugin.getName();
    DefaultStageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer();
    FailureCollector collector = stageConfigurer.getFailureCollector();
    Object plugin = getPlugin(stageName, etlPlugin, pluginSelector, type, pluginName, collector);
    try {
        if (type.equals(BatchJoiner.PLUGIN_TYPE)) {
            MultiInputPipelineConfigurable multiPlugin = (MultiInputPipelineConfigurable) plugin;
            multiPlugin.configurePipeline(pipelineConfigurer);
            // to the BatchAutoJoiner while preserving backwards compatibility in the pipeline config.
            if (plugin instanceof AutoJoiner) {
                configureAutoJoiner(stageName, (AutoJoiner) plugin, stageConfigurer, collector);
            }
        } else if (type.equals(SplitterTransform.PLUGIN_TYPE)) {
            MultiOutputPipelineConfigurable multiOutputPlugin = (MultiOutputPipelineConfigurable) plugin;
            multiOutputPlugin.configurePipeline(pipelineConfigurer);
        } else if (!type.equals(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
            PipelineConfigurable singlePlugin = (PipelineConfigurable) plugin;
            singlePlugin.configurePipeline(pipelineConfigurer);
            // evaluate macros and find out if there is connection used
            if ((sourcePluginTypes.contains(type) || BatchSink.PLUGIN_TYPE.equals(type)) && runtimeEvaluator == null) {
                pluginConfigurer.evaluateMacros(etlPlugin.getProperties(), connectionEvaluator, options);
            }
        }
    } catch (InvalidConfigPropertyException e) {
        collector.addFailure(e.getMessage(), String.format("Provide valid value for config property '%s'.", e.getProperty())).withConfigProperty(e.getProperty());
    } catch (InvalidStageException e) {
        if (e.getReasons().isEmpty()) {
            collector.addFailure(e.getMessage(), null);
        }
        for (InvalidStageException reason : e.getReasons()) {
            if (reason instanceof InvalidConfigPropertyException) {
                InvalidConfigPropertyException configException = (InvalidConfigPropertyException) reason;
                collector.addFailure(configException.getMessage(), String.format("Provide valid value for config property '%s'.", configException.getProperty())).withConfigProperty(configException.getProperty());
            } else {
                collector.addFailure(reason.getMessage(), null);
            }
        }
    } catch (ValidationException e) {
        throw e;
    } catch (NullPointerException e) {
        // handle the case where plugin throws null pointer exception, this is to avoid having 'null' as error message
        collector.addFailure(String.format("Null error occurred while configuring the stage %s.", stageName), null).withStacktrace(e.getStackTrace());
    } catch (ArrayIndexOutOfBoundsException e) {
        // handle the case where plugin throws index out of bounds exception,
        // this is to avoid having a number like '2', '8' etc as error message
        collector.addFailure(String.format("Index out of bounds error occurred while configuring the stage %s.", stageName), null).withStacktrace(e.getStackTrace());
    } catch (ConnectionBadRequestException e) {
        collector.addFailure(e.getMessage(), "Provide a valid connection name.");
    } catch (Exception e) {
        collector.addFailure(String.format("Error encountered while configuring the stage: '%s'", e.getMessage()), null).withStacktrace(e.getStackTrace());
    }
    // throw validation exception if there are any errors being carried by failure collector
    collector.getOrThrowException();
    PluginSpec pluginSpec = new PluginSpec(type, pluginName, etlPlugin.getProperties(), pluginSelector.getSelectedArtifact());
    StageSpec.Builder specBuilder = StageSpec.builder(stageName, pluginSpec).addInputSchemas(pipelineConfigurer.getStageConfigurer().getInputSchemas()).setErrorSchema(stageConfigurer.getErrorSchema());
    if (type.equals(SplitterTransform.PLUGIN_TYPE)) {
        specBuilder.setPortSchemas(stageConfigurer.getOutputPortSchemas());
    } else {
        specBuilder.setOutputSchema(stageConfigurer.getOutputSchema());
    }
    return specBuilder;
}
Also used : ArtifactSelectorProvider(io.cdap.cdap.etl.common.ArtifactSelectorProvider) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) InvalidStageException(io.cdap.cdap.etl.api.validation.InvalidStageException) MultiOutputPipelineConfigurable(io.cdap.cdap.etl.api.MultiOutputPipelineConfigurable) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer) InvalidConfigPropertyException(io.cdap.cdap.etl.api.validation.InvalidConfigPropertyException) InvalidPluginConfigException(io.cdap.cdap.api.plugin.InvalidPluginConfigException) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) ConnectionBadRequestException(io.cdap.cdap.etl.proto.connection.ConnectionBadRequestException) InvalidStageException(io.cdap.cdap.etl.api.validation.InvalidStageException) ConnectionBadRequestException(io.cdap.cdap.etl.proto.connection.ConnectionBadRequestException) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) MultiInputPipelineConfigurable(io.cdap.cdap.etl.api.MultiInputPipelineConfigurable) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) InvalidConfigPropertyException(io.cdap.cdap.etl.api.validation.InvalidConfigPropertyException) AutoJoiner(io.cdap.cdap.etl.api.join.AutoJoiner) MultiInputPipelineConfigurable(io.cdap.cdap.etl.api.MultiInputPipelineConfigurable) MultiOutputPipelineConfigurable(io.cdap.cdap.etl.api.MultiOutputPipelineConfigurable) PipelineConfigurable(io.cdap.cdap.etl.api.PipelineConfigurable) FailureCollector(io.cdap.cdap.etl.api.FailureCollector)

Example 3 with DefaultStageConfigurer

use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.

the class PipelineSpecGenerator method configureStages.

/**
 * Performs most of the validation and configuration needed by a pipeline.
 * Handles stages, connections, resources, and stage logging settings.
 *
 * @param config user provided ETL config
 * @param specBuilder builder for creating a pipeline spec.
 * @throws ValidationException if the pipeline is invalid
 */
protected void configureStages(ETLConfig config, PipelineSpec.Builder specBuilder) throws ValidationException {
    // validate the config and determine the order we should configure the stages in.
    ValidatedPipeline validatedPipeline = validateConfig(config);
    List<ETLStage> traversalOrder = validatedPipeline.getTraversalOrder();
    Map<String, DefaultPipelineConfigurer> pluginConfigurers = new HashMap<>(traversalOrder.size());
    Map<String, String> pluginTypes = new HashMap<>(traversalOrder.size());
    for (ETLStage stage : traversalOrder) {
        String stageName = stage.getName();
        pluginTypes.put(stageName, stage.getPlugin().getType());
        pluginConfigurers.put(stageName, new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, stageName, engine, new DefaultStageConfigurer(stageName), featureFlagsProvider));
    }
    SchemaPropagator schemaPropagator = new SchemaPropagator(pluginConfigurers, validatedPipeline::getOutputs, pluginTypes::get);
    // anything prefixed by 'system.[engine].' is a pipeline property.
    Map<String, String> pipelineProperties = new HashMap<>();
    String prefix = String.format("system.%s.", engine.name().toLowerCase());
    int prefixLength = prefix.length();
    for (Map.Entry<String, String> property : config.getProperties().entrySet()) {
        if (property.getKey().startsWith(prefix)) {
            String strippedKey = property.getKey().substring(prefixLength);
            pipelineProperties.put(strippedKey, property.getValue());
        }
    }
    // row = property name, column = property value, val = stage that set the property
    // this is used so that we can error with a nice message about which stages are setting conflicting properties
    Table<String, String, String> propertiesFromStages = HashBasedTable.create();
    // configure the stages in order and build up the stage specs
    for (ETLStage stage : traversalOrder) {
        String stageName = stage.getName();
        DefaultPipelineConfigurer pluginConfigurer = pluginConfigurers.get(stageName);
        ConfiguredStage configuredStage = configureStage(stage, validatedPipeline, pluginConfigurer);
        schemaPropagator.propagateSchema(configuredStage.getStageSpec());
        specBuilder.addStage(configuredStage.getStageSpec());
        for (Map.Entry<String, String> propertyEntry : configuredStage.pipelineProperties.entrySet()) {
            propertiesFromStages.put(propertyEntry.getKey(), propertyEntry.getValue(), stageName);
        }
    }
    // check that multiple stages did not set conflicting properties
    for (String propertyName : propertiesFromStages.rowKeySet()) {
        // go through all values set for the property name. If there is more than one, we have a conflict.
        Map<String, String> propertyValues = propertiesFromStages.row(propertyName);
        if (propertyValues.size() > 1) {
            StringBuilder errMsg = new StringBuilder("Pipeline property '").append(propertyName).append("' is being set to different values by stages.");
            for (Map.Entry<String, String> valueEntry : propertyValues.entrySet()) {
                String propertyValue = valueEntry.getKey();
                String fromStage = valueEntry.getValue();
                errMsg.append(" stage '").append(fromStage).append("' = '").append(propertyValue).append("',");
            }
            errMsg.deleteCharAt(errMsg.length() - 1);
            throw new IllegalArgumentException(errMsg.toString());
        }
        pipelineProperties.put(propertyName, propertyValues.keySet().iterator().next());
    }
    specBuilder.addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).setProperties(pipelineProperties).addConnectionsUsed(connectionEvaluator.getUsedConnections()).build();
}
Also used : HashMap(java.util.HashMap) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) Map(java.util.Map) HashMap(java.util.HashMap)

Example 4 with DefaultStageConfigurer

use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.

the class SchemaPropagator method propagateSchema.

/**
 * Propagate the output schema set for this stage as the input schema for all of its outputs.
 *
 * @param stageSpec the specification for the stage that was just configured
 */
public void propagateSchema(StageSpec stageSpec) {
    String stageName = stageSpec.getName();
    Set<String> nextStages = stageOutputsProvider.apply(stageName);
    for (String nextStageName : nextStages) {
        String nextStageType = stageTypeProvider.apply(nextStageName);
        Schema nextStageInputSchema = getNextStageInputSchema(stageSpec, nextStageName, nextStageType);
        DefaultStageConfigurer outputStageConfigurer = pluginConfigurers.get(nextStageName).getStageConfigurer();
        // Do not allow more than one input schema for stages other than Joiner and Action
        if (!BatchJoiner.PLUGIN_TYPE.equals(nextStageType) && !Action.PLUGIN_TYPE.equals(nextStageType) && !Condition.PLUGIN_TYPE.equals(nextStageType) && !hasSameSchema(outputStageConfigurer.getInputSchemas(), nextStageInputSchema)) {
            throw new IllegalArgumentException("Two different input schema were set for the stage " + nextStageName);
        }
        outputStageConfigurer.addInputSchema(stageName, nextStageInputSchema);
        outputStageConfigurer.addInputStage(stageName);
    }
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer)

Example 5 with DefaultStageConfigurer

use of io.cdap.cdap.etl.common.DefaultStageConfigurer in project cdap by caskdata.

the class BatchPipelineSpecGenerator method configureSqlEngine.

private StageSpec configureSqlEngine(ETLBatchConfig config) throws ValidationException {
    if (!config.isPushdownEnabled() || config.getTransformationPushdown() == null || config.getTransformationPushdown().getPlugin() == null) {
        return null;
    }
    // Fixed name for SQL Engine config.
    String stageName = SQLEngineUtils.buildStageName(config.getTransformationPushdown().getPlugin().getName());
    ETLStage sqlEngineStage = new ETLStage(stageName, config.getTransformationPushdown().getPlugin());
    DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(pluginConfigurer, datasetConfigurer, stageName, engine, new DefaultStageConfigurer(stageName), getFeatureFlagsProvider());
    ConfiguredStage configuredStage = configureStage(sqlEngineStage, validateConfig(config), pipelineConfigurer);
    return configuredStage.getStageSpec();
}
Also used : ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer)

Aggregations

DefaultStageConfigurer (io.cdap.cdap.etl.common.DefaultStageConfigurer)7 DefaultPipelineConfigurer (io.cdap.cdap.etl.common.DefaultPipelineConfigurer)4 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)4 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)4 Schema (io.cdap.cdap.api.data.schema.Schema)2 ValidationException (io.cdap.cdap.etl.api.validation.ValidationException)2 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 InvalidPluginConfigException (io.cdap.cdap.api.plugin.InvalidPluginConfigException)1 FailureCollector (io.cdap.cdap.etl.api.FailureCollector)1 MultiInputPipelineConfigurable (io.cdap.cdap.etl.api.MultiInputPipelineConfigurable)1 MultiOutputPipelineConfigurable (io.cdap.cdap.etl.api.MultiOutputPipelineConfigurable)1 PipelineConfigurable (io.cdap.cdap.etl.api.PipelineConfigurable)1 AutoJoiner (io.cdap.cdap.etl.api.join.AutoJoiner)1 InvalidConfigPropertyException (io.cdap.cdap.etl.api.validation.InvalidConfigPropertyException)1 InvalidStageException (io.cdap.cdap.etl.api.validation.InvalidStageException)1 BatchPipelineSpec (io.cdap.cdap.etl.batch.BatchPipelineSpec)1 BatchPipelineSpecGenerator (io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator)1 ArtifactSelectorProvider (io.cdap.cdap.etl.common.ArtifactSelectorProvider)1