Search in sources :

Example 6 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSingleAction.

@Test
public void testSingleAction() throws ValidationException {
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 7 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testDifferentInputSchemasForAction.

@Test
public void testDifferentInputSchemasForAction() throws ValidationException {
    /*
     *           ---- transformA ---- sinkA ----
     *           |                             |
     * source ---                              |--- action
     *           |                             |
     *           ---- transformB ---- sinkB ----
     */
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("tA", MOCK_TRANSFORM_A)).addStage(new ETLStage("tB", MOCK_TRANSFORM_B)).addStage(new ETLStage("sinkA", MOCK_SINK)).addStage(new ETLStage("sinkB", MOCK_SINK)).addStage(new ETLStage("action", MOCK_ACTION)).addConnection("source", "tA").addConnection("source", "tB").addConnection("tA", "sinkA").addConnection("tB", "sinkB").addConnection("sinkA", "action").addConnection("sinkB", "action").setNumOfRecordsPreview(100).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "tA", "tB").build()).addStage(StageSpec.builder("sinkA", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("tA", SCHEMA_A).addOutput(null, "action").setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("sinkB", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("tB", SCHEMA_B).addOutput(null, "action").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("tA", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_A, "sinkA").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("tB", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_B, "sinkB").setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).addInputSchema("sinkA", null).addInputSchema("sinkB", null).build()).addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 8 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelineSpecGenerator method configureStage.

/**
 * Configures a plugin and returns the spec for it.
 *
 * @param stageName the unique plugin id
 * @param etlPlugin user provided configuration for the plugin
 * @param pipelineConfigurer default pipeline configurer to configure the plugin
 * @return the spec for the plugin
 * @throws IllegalArgumentException if the plugin with same id is already deployed
 * @throws ValidationException if the plugin threw an exception during configuration
 */
public StageSpec.Builder configureStage(String stageName, ETLPlugin etlPlugin, DefaultPipelineConfigurer pipelineConfigurer) throws ValidationException {
    TrackedPluginSelector pluginSelector = new TrackedPluginSelector(new ArtifactSelectorProvider().getPluginSelector(etlPlugin.getArtifactConfig()));
    String type = etlPlugin.getType();
    String pluginName = etlPlugin.getName();
    DefaultStageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer();
    FailureCollector collector = stageConfigurer.getFailureCollector();
    Object plugin = getPlugin(stageName, etlPlugin, pluginSelector, type, pluginName, collector);
    try {
        if (type.equals(BatchJoiner.PLUGIN_TYPE)) {
            MultiInputPipelineConfigurable multiPlugin = (MultiInputPipelineConfigurable) plugin;
            multiPlugin.configurePipeline(pipelineConfigurer);
            // to the BatchAutoJoiner while preserving backwards compatibility in the pipeline config.
            if (plugin instanceof AutoJoiner) {
                configureAutoJoiner(stageName, (AutoJoiner) plugin, stageConfigurer, collector);
            }
        } else if (type.equals(SplitterTransform.PLUGIN_TYPE)) {
            MultiOutputPipelineConfigurable multiOutputPlugin = (MultiOutputPipelineConfigurable) plugin;
            multiOutputPlugin.configurePipeline(pipelineConfigurer);
        } else if (!type.equals(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
            PipelineConfigurable singlePlugin = (PipelineConfigurable) plugin;
            singlePlugin.configurePipeline(pipelineConfigurer);
            // evaluate macros and find out if there is connection used
            if ((sourcePluginTypes.contains(type) || BatchSink.PLUGIN_TYPE.equals(type)) && runtimeEvaluator == null) {
                pluginConfigurer.evaluateMacros(etlPlugin.getProperties(), connectionEvaluator, options);
            }
        }
    } catch (InvalidConfigPropertyException e) {
        collector.addFailure(e.getMessage(), String.format("Provide valid value for config property '%s'.", e.getProperty())).withConfigProperty(e.getProperty());
    } catch (InvalidStageException e) {
        if (e.getReasons().isEmpty()) {
            collector.addFailure(e.getMessage(), null);
        }
        for (InvalidStageException reason : e.getReasons()) {
            if (reason instanceof InvalidConfigPropertyException) {
                InvalidConfigPropertyException configException = (InvalidConfigPropertyException) reason;
                collector.addFailure(configException.getMessage(), String.format("Provide valid value for config property '%s'.", configException.getProperty())).withConfigProperty(configException.getProperty());
            } else {
                collector.addFailure(reason.getMessage(), null);
            }
        }
    } catch (ValidationException e) {
        throw e;
    } catch (NullPointerException e) {
        // handle the case where plugin throws null pointer exception, this is to avoid having 'null' as error message
        collector.addFailure(String.format("Null error occurred while configuring the stage %s.", stageName), null).withStacktrace(e.getStackTrace());
    } catch (ArrayIndexOutOfBoundsException e) {
        // handle the case where plugin throws index out of bounds exception,
        // this is to avoid having a number like '2', '8' etc as error message
        collector.addFailure(String.format("Index out of bounds error occurred while configuring the stage %s.", stageName), null).withStacktrace(e.getStackTrace());
    } catch (ConnectionBadRequestException e) {
        collector.addFailure(e.getMessage(), "Provide a valid connection name.");
    } catch (Exception e) {
        collector.addFailure(String.format("Error encountered while configuring the stage: '%s'", e.getMessage()), null).withStacktrace(e.getStackTrace());
    }
    // throw validation exception if there are any errors being carried by failure collector
    collector.getOrThrowException();
    PluginSpec pluginSpec = new PluginSpec(type, pluginName, etlPlugin.getProperties(), pluginSelector.getSelectedArtifact());
    StageSpec.Builder specBuilder = StageSpec.builder(stageName, pluginSpec).addInputSchemas(pipelineConfigurer.getStageConfigurer().getInputSchemas()).setErrorSchema(stageConfigurer.getErrorSchema());
    if (type.equals(SplitterTransform.PLUGIN_TYPE)) {
        specBuilder.setPortSchemas(stageConfigurer.getOutputPortSchemas());
    } else {
        specBuilder.setOutputSchema(stageConfigurer.getOutputSchema());
    }
    return specBuilder;
}
Also used : ArtifactSelectorProvider(io.cdap.cdap.etl.common.ArtifactSelectorProvider) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) InvalidStageException(io.cdap.cdap.etl.api.validation.InvalidStageException) MultiOutputPipelineConfigurable(io.cdap.cdap.etl.api.MultiOutputPipelineConfigurable) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer) InvalidConfigPropertyException(io.cdap.cdap.etl.api.validation.InvalidConfigPropertyException) InvalidPluginConfigException(io.cdap.cdap.api.plugin.InvalidPluginConfigException) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) ConnectionBadRequestException(io.cdap.cdap.etl.proto.connection.ConnectionBadRequestException) InvalidStageException(io.cdap.cdap.etl.api.validation.InvalidStageException) ConnectionBadRequestException(io.cdap.cdap.etl.proto.connection.ConnectionBadRequestException) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) MultiInputPipelineConfigurable(io.cdap.cdap.etl.api.MultiInputPipelineConfigurable) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) InvalidConfigPropertyException(io.cdap.cdap.etl.api.validation.InvalidConfigPropertyException) AutoJoiner(io.cdap.cdap.etl.api.join.AutoJoiner) MultiInputPipelineConfigurable(io.cdap.cdap.etl.api.MultiInputPipelineConfigurable) MultiOutputPipelineConfigurable(io.cdap.cdap.etl.api.MultiOutputPipelineConfigurable) PipelineConfigurable(io.cdap.cdap.etl.api.PipelineConfigurable) FailureCollector(io.cdap.cdap.etl.api.FailureCollector)

Example 9 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelinePlanner method dagToPipeline.

/**
 * Converts a Dag into a PipelinePhase, using what we know about the plugin type of each node in the dag.
 * The PipelinePhase is what programs will take as input, and keeps track of sources, transforms, sinks, etc.
 *
 * @param dag the dag to convert
 * @param connectors connector nodes across all dags
 * @param specs specifications for every stage
 * @return the converted dag
 */
private PipelinePhase dagToPipeline(Dag dag, Map<String, String> connectors, Map<String, StageSpec> specs, Map<String, String> conditionConnectors) {
    PipelinePhase.Builder phaseBuilder = PipelinePhase.builder(supportedPluginTypes);
    for (String stageName : dag.getTopologicalOrder()) {
        Set<String> outputs = dag.getNodeOutputs(stageName);
        if (!outputs.isEmpty()) {
            phaseBuilder.addConnections(stageName, outputs);
        }
        // add connectors
        String originalName = connectors.get(stageName);
        if (originalName != null || conditionConnectors.values().contains(stageName)) {
            String connectorType = dag.getSources().contains(stageName) ? Constants.Connector.SOURCE_TYPE : Constants.Connector.SINK_TYPE;
            PluginSpec connectorSpec = new PluginSpec(Constants.Connector.PLUGIN_TYPE, "connector", ImmutableMap.of(Constants.Connector.ORIGINAL_NAME, originalName != null ? originalName : stageName, Constants.Connector.TYPE, connectorType), null);
            phaseBuilder.addStage(StageSpec.builder(stageName, connectorSpec).build());
            continue;
        }
        // add other plugin types
        StageSpec spec = specs.get(stageName);
        phaseBuilder.addStage(spec);
    }
    return phaseBuilder.build();
}
Also used : PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec)

Example 10 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class ExternalSparkProgram method configure.

@Override
protected void configure() {
    setClientResources(phaseSpec.getClientResources());
    setDriverResources(phaseSpec.getDriverResources());
    setExecutorResources(phaseSpec.getResources());
    // register the plugins at program level so that the program can be failed by the platform early in case of
    // plugin requirements not being meet
    phaseSpec.getPhase().registerPlugins(getConfigurer(), runtimeConfigurer, deployedNamespace);
    PluginSpec pluginSpec = stageSpec.getPlugin();
    PluginProperties pluginProperties = PluginProperties.builder().addAll(pluginSpec.getProperties()).build();
    // use a UUID as plugin ID so that it doesn't clash with anything. Only using the class here to
    // check which main class is needed
    // TODO: clean this up so that we only get the class once and store it in the PluginSpec instead of getting
    // it in the pipeline spec generator and here
    Object sparkPlugin = usePlugin(pluginSpec.getType(), pluginSpec.getName(), UUID.randomUUID().toString(), pluginProperties);
    if (sparkPlugin == null) {
        // should never happen, should have been checked before by the pipeline spec generator
        throw new IllegalStateException(String.format("No plugin found of type %s and name %s for stage %s", pluginSpec.getType(), pluginSpec.getName(), STAGE_NAME));
    }
    if (Spark.class.isAssignableFrom(sparkPlugin.getClass())) {
        // TODO: Pass in a forwarding configurer so that we can capture the properties set by the plugin
        // However the usage is very limited as the plugin can always use plugin config to preserve properties
        ((Spark) sparkPlugin).configure(getConfigurer());
    } else if (SparkMain.class.isAssignableFrom(sparkPlugin.getClass())) {
        setMainClass(ScalaSparkMainWrapper.class);
    } else {
        setMainClass(JavaSparkMainWrapper.class);
    }
    setName(phaseSpec.getPhaseName());
    Map<String, String> properties = new HashMap<>();
    properties.put(STAGE_NAME, stageSpec.getName());
    properties.put(Constants.PIPELINEID, GSON.toJson(phaseSpec, BatchPhaseSpec.class));
    setProperties(properties);
}
Also used : PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) HashMap(java.util.HashMap) SparkMain(io.cdap.cdap.api.spark.SparkMain) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) Spark(io.cdap.cdap.api.spark.Spark) AbstractSpark(io.cdap.cdap.api.spark.AbstractSpark) PluginProperties(io.cdap.cdap.api.plugin.PluginProperties)

Aggregations

PluginSpec (io.cdap.cdap.etl.proto.v2.spec.PluginSpec)16 Test (org.junit.Test)12 BatchPipelineSpec (io.cdap.cdap.etl.batch.BatchPipelineSpec)11 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)11 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)11 PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)11 Resources (io.cdap.cdap.api.Resources)4 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)3 HashMap (java.util.HashMap)3 ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)2 ArtifactVersion (io.cdap.cdap.api.artifact.ArtifactVersion)2 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)2 ETLTransformationPushdown (io.cdap.cdap.etl.proto.v2.ETLTransformationPushdown)2 ArtifactVersionRange (io.cdap.cdap.api.artifact.ArtifactVersionRange)1 Schema (io.cdap.cdap.api.data.schema.Schema)1 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)1 MacroParserOptions (io.cdap.cdap.api.macro.MacroParserOptions)1 InvalidPluginConfigException (io.cdap.cdap.api.plugin.InvalidPluginConfigException)1 PluginProperties (io.cdap.cdap.api.plugin.PluginProperties)1 AbstractSpark (io.cdap.cdap.api.spark.AbstractSpark)1