Search in sources :

Example 1 with PluginSpec

use of co.cask.cdap.etl.spec.PluginSpec in project cdap by caskdata.

the class ExternalSparkProgram method configure.

@Override
protected void configure() {
    PluginSpec pluginSpec = stageSpec.getPlugin();
    PluginProperties pluginProperties = PluginProperties.builder().addAll(pluginSpec.getProperties()).build();
    // use a UUID as plugin ID so that it doesn't clash with anything. Only using the class here to
    // check which main class is needed
    // TODO: clean this up so that we only get the class once and store it in the PluginSpec instead of getting
    // it in the pipeline spec generator and here
    Object sparkPlugin = usePlugin(pluginSpec.getType(), pluginSpec.getName(), UUID.randomUUID().toString(), pluginProperties);
    if (sparkPlugin == null) {
        // should never happen, should have been checked before by the pipeline spec generator
        throw new IllegalStateException(String.format("No plugin found of type %s and name %s for stage %s", pluginSpec.getType(), pluginSpec.getName(), STAGE_NAME));
    }
    if (Spark.class.isAssignableFrom(sparkPlugin.getClass())) {
        // TODO: Pass in a forwarding configurer so that we can capture the properties set by the plugin
        // However the usage is very limited as the plugin can always use plugin config to preserve properties
        ((Spark) sparkPlugin).configure(getConfigurer());
    } else if (SparkMain.class.isAssignableFrom(sparkPlugin.getClass())) {
        setMainClass(ScalaSparkMainWrapper.class);
    } else {
        setMainClass(JavaSparkMainWrapper.class);
    }
    setName(phaseSpec.getPhaseName());
    Map<String, String> properties = new HashMap<>();
    properties.put(STAGE_NAME, stageSpec.getName());
    properties.put(Constants.PIPELINEID, GSON.toJson(phaseSpec, BatchPhaseSpec.class));
    setProperties(properties);
}
Also used : PluginSpec(co.cask.cdap.etl.spec.PluginSpec) HashMap(java.util.HashMap) SparkMain(co.cask.cdap.api.spark.SparkMain) BatchPhaseSpec(co.cask.cdap.etl.batch.BatchPhaseSpec) AbstractSpark(co.cask.cdap.api.spark.AbstractSpark) Spark(co.cask.cdap.api.spark.Spark) PluginProperties(co.cask.cdap.api.plugin.PluginProperties)

Example 2 with PluginSpec

use of co.cask.cdap.etl.spec.PluginSpec in project cdap by caskdata.

the class BatchPhaseSpecTest method testDescription.

@Test
public void testDescription() throws Exception {
    /*
     * source1 --|
     *           |--> sink.connector
     * source2 --|
     */
    Map<String, String> props = new HashMap<>();
    PluginSpec connectorSpec = new PluginSpec(Constants.Connector.PLUGIN_TYPE, "connector", ImmutableMap.<String, String>of(), null);
    ArtifactId artifactId = new ArtifactId("art", new ArtifactVersion("1.0.0"), ArtifactScope.USER);
    PipelinePhase.Builder builder = PipelinePhase.builder(ImmutableSet.of(BatchSource.PLUGIN_TYPE, Constants.Connector.PLUGIN_TYPE)).addStage(StageSpec.builder("source1", new PluginSpec(BatchSource.PLUGIN_TYPE, "src", props, artifactId)).build()).addStage(StageSpec.builder("source2", new PluginSpec(BatchSource.PLUGIN_TYPE, "src", props, artifactId)).addInputSchema("a", Schema.recordOf("stuff", Schema.Field.of("x", Schema.of(Schema.Type.INT)))).build()).addStage(StageSpec.builder("sink.connector", connectorSpec).build()).addConnection("source1", "sink.connector").addConnection("source2", "sink.connector");
    BatchPhaseSpec phaseSpec = new BatchPhaseSpec("phase-1", builder.build(), new Resources(), new Resources(), new Resources(), false, false, Collections.<String, String>emptyMap(), 0, Collections.<String, String>emptyMap(), false);
    Assert.assertEquals("Sources 'source1', 'source2' to sinks 'sink.connector'.", phaseSpec.getDescription());
}
Also used : PluginSpec(co.cask.cdap.etl.spec.PluginSpec) ArtifactVersion(co.cask.cdap.api.artifact.ArtifactVersion) ArtifactId(co.cask.cdap.api.artifact.ArtifactId) HashMap(java.util.HashMap) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) Resources(co.cask.cdap.api.Resources) Test(org.junit.Test)

Example 3 with PluginSpec

use of co.cask.cdap.etl.spec.PluginSpec in project cdap by caskdata.

the class PipelinePlanner method dagToPipeline.

/**
 * Converts a Dag into a PipelinePhase, using what we know about the plugin type of each node in the dag.
 * The PipelinePhase is what programs will take as input, and keeps track of sources, transforms, sinks, etc.
 *
 * @param dag the dag to convert
 * @param connectors connector nodes across all dags
 * @param specs specifications for every stage
 * @return the converted dag
 */
private PipelinePhase dagToPipeline(Dag dag, Map<String, String> connectors, Map<String, StageSpec> specs, Map<String, String> conditionConnectors) {
    PipelinePhase.Builder phaseBuilder = PipelinePhase.builder(supportedPluginTypes);
    for (String stageName : dag.getTopologicalOrder()) {
        Set<String> outputs = dag.getNodeOutputs(stageName);
        if (!outputs.isEmpty()) {
            phaseBuilder.addConnections(stageName, outputs);
        }
        // add connectors
        String originalName = connectors.get(stageName);
        if (originalName != null || conditionConnectors.values().contains(stageName)) {
            String connectorType = dag.getSources().contains(stageName) ? Constants.Connector.SOURCE_TYPE : Constants.Connector.SINK_TYPE;
            PluginSpec connectorSpec = new PluginSpec(Constants.Connector.PLUGIN_TYPE, "connector", ImmutableMap.of(Constants.Connector.ORIGINAL_NAME, originalName != null ? originalName : stageName, Constants.Connector.TYPE, connectorType), null);
            phaseBuilder.addStage(StageSpec.builder(stageName, connectorSpec).build());
            continue;
        }
        // add other plugin types
        StageSpec spec = specs.get(stageName);
        phaseBuilder.addStage(spec);
    }
    return phaseBuilder.build();
}
Also used : PluginSpec(co.cask.cdap.etl.spec.PluginSpec) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec)

Example 4 with PluginSpec

use of co.cask.cdap.etl.spec.PluginSpec in project cdap by caskdata.

the class BatchPipelineSpecGenerator method generateSpec.

@Override
public BatchPipelineSpec generateSpec(ETLBatchConfig config) {
    BatchPipelineSpec.Builder specBuilder = BatchPipelineSpec.builder();
    for (ETLStage endingAction : config.getPostActions()) {
        String name = endingAction.getName();
        DefaultPipelineConfigurer<T> pipelineConfigurer = new DefaultPipelineConfigurer<>(configurer, name, engine);
        PluginSpec pluginSpec = configurePlugin(endingAction.getName(), endingAction.getPlugin(), pipelineConfigurer);
        specBuilder.addAction(new ActionSpec(name, pluginSpec));
    }
    configureStages(config, specBuilder);
    return specBuilder.build();
}
Also used : PluginSpec(co.cask.cdap.etl.spec.PluginSpec) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) DefaultPipelineConfigurer(co.cask.cdap.etl.common.DefaultPipelineConfigurer)

Aggregations

PluginSpec (co.cask.cdap.etl.spec.PluginSpec)4 PipelinePhase (co.cask.cdap.etl.common.PipelinePhase)2 HashMap (java.util.HashMap)2 Resources (co.cask.cdap.api.Resources)1 ArtifactId (co.cask.cdap.api.artifact.ArtifactId)1 ArtifactVersion (co.cask.cdap.api.artifact.ArtifactVersion)1 PluginProperties (co.cask.cdap.api.plugin.PluginProperties)1 AbstractSpark (co.cask.cdap.api.spark.AbstractSpark)1 Spark (co.cask.cdap.api.spark.Spark)1 SparkMain (co.cask.cdap.api.spark.SparkMain)1 BatchPhaseSpec (co.cask.cdap.etl.batch.BatchPhaseSpec)1 DefaultPipelineConfigurer (co.cask.cdap.etl.common.DefaultPipelineConfigurer)1 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)1 StageSpec (co.cask.cdap.etl.spec.StageSpec)1 Test (org.junit.Test)1