Search in sources :

Example 11 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class PipelinePlannerTest method testMultipleActionConditions.

@Test
public void testMultipleActionConditions() {
    /*
                                                   |-- n2 -- a3
            |-- a1 --|        |-- n0 -- n1 -- c1 --|                          |-- a5 --|
        a0--|        |-- c0 --|                    |-- n3 -- c2 -- n8 -- a4 --|        |-- a7
            |-- a2 --|        |                                               |-- a6 --|
                              |        |-- n4 -- n5 -- c4 -- c5 -- n9
                              |-- c3 --|
                                       |              |-- a8
                                       |-- n6 -- n7 --|
                                                      |-- a9
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("a0", ACTION).build(), StageSpec.builder("a1", ACTION).build(), StageSpec.builder("a2", ACTION).build(), StageSpec.builder("a3", ACTION).build(), StageSpec.builder("a4", ACTION).build(), StageSpec.builder("a5", ACTION).build(), StageSpec.builder("a6", ACTION).build(), StageSpec.builder("a7", ACTION).build(), StageSpec.builder("a8", ACTION).build(), StageSpec.builder("a9", ACTION).build(), StageSpec.builder("c0", CONDITION).build(), StageSpec.builder("c1", CONDITION).build(), StageSpec.builder("c2", CONDITION).build(), StageSpec.builder("c3", CONDITION).build(), StageSpec.builder("c4", CONDITION).build(), StageSpec.builder("c5", CONDITION).build(), StageSpec.builder("n0", NODE).build(), StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("n6", NODE).build(), StageSpec.builder("n7", NODE).build(), StageSpec.builder("n8", NODE).build(), StageSpec.builder("n9", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("a0", "a1"), new Connection("a0", "a2"), new Connection("a1", "c0"), new Connection("a2", "c0"), new Connection("c0", "n0", true), new Connection("c0", "c3", false), new Connection("n0", "n1"), new Connection("n1", "c1"), new Connection("c1", "n2", true), new Connection("c1", "n3", false), new Connection("n2", "a3"), new Connection("n3", "c2"), new Connection("c2", "n8", true), new Connection("n8", "a4"), new Connection("a4", "a5"), new Connection("a4", "a6"), new Connection("a5", "a7"), new Connection("a6", "a7"), new Connection("c3", "n4", true), new Connection("c3", "n6", false), new Connection("n4", "n5"), new Connection("n5", "c4"), new Connection("c4", "c5", true), new Connection("c5", "n9", true), new Connection("n6", "n7"), new Connection("n7", "a8"), new Connection("n7", "a9"));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), ACTION.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
    Set<String> reduceTypes = ImmutableSet.of();
    Set<String> emptySet = ImmutableSet.of();
    Set<String> actionTypes = ImmutableSet.of(ACTION.getType());
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, actionTypes, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a0", "a2"));
    phaseConnections.add(new Connection("a1", "c0"));
    phaseConnections.add(new Connection("a2", "c0"));
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a4", "a5"));
    phaseConnections.add(new Connection("a4", "a6"));
    phaseConnections.add(new Connection("a5", "a7"));
    phaseConnections.add(new Connection("a6", "a7"));
    phaseConnections.add(new Connection("c0", "c3", false));
    phaseConnections.add(new Connection("c4", "c5", true));
    for (String action : ImmutableList.of("a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9")) {
        phases.put(action, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(action, ACTION).build()).build());
    }
    for (String condition : ImmutableList.of("c0", "c1", "c2", "c3", "c4", "c5")) {
        phases.put(condition, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(condition, CONDITION).build()).build());
    }
    // [c0] --true-->  [c0 -- n0 -- n1 -- c1]
    PipelinePhase phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n0", NODE).build()).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n0", "n1").addConnection("n1", "c1.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c0", "n0"), new Connection("n0", "n1"), new Connection("n1", "c1")));
    String phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c0", phaseName, true));
    // [c0 -- n0 -- n1 -- c1] --> [c1]
    phaseConnections.add(new Connection(phaseName, "c1"));
    // [c1] --true--> [c1 -- n2 -- a3]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("c1.connector", "n2").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c1", "n2"), new Connection("n2", "a3")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c1", phaseName, true));
    // [c1 -- n2 -- a3] -- [a3]
    phaseConnections.add(new Connection(phaseName, "a3"));
    // [c1] --false--> [c1 -- n3 -- c2]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("c1.connector", "n3").addConnection("n3", "c2.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c1", "n3"), new Connection("n3", "c2")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c1", phaseName, false));
    // [c1.connector -- n3 -- c2.connector] --> [c2]
    phaseConnections.add(new Connection(phaseName, "c2"));
    // [c2] --true--> [c2 -- n8 -- a4]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n8", NODE).build()).addConnection("c2.connector", "n8").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c2", "n8"), new Connection("n8", "a4")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c2", phaseName, true));
    // [c2 -- n8 -- a4] --> [a4]
    phaseConnections.add(new Connection(phaseName, "a4"));
    // [c3] --true--> [c3 -- n4 -- n5 -- c4]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n4", NODE).build()).addStage(StageSpec.builder("n5", NODE).build()).addStage(StageSpec.builder("c4.connector", connectorSpec("c4.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n4", "n5").addConnection("n5", "c4.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c3", "n4"), new Connection("n4", "n5"), new Connection("n5", "c4")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c3", phaseName, true));
    // [c3 -- n4 -- n5 -- c4] --> c4
    phaseConnections.add(new Connection(phaseName, "c4"));
    // [c5] --true--> [c5 (via c4.connector) -- n9]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c4.connector", connectorSpec("c4.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n9", NODE).build()).addConnection("c4.connector", "n9").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c5", "n9")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c5", phaseName, true));
    // [c3] --false--> [c3 -- n6 -- n7 -- a8, a9]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n6", NODE).build()).addStage(StageSpec.builder("n7", NODE).build()).addConnection("n6", "n7").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c3", "n6"), new Connection("n6", "n7"), new Connection("n7", "a8"), new Connection("n7", "a9")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c3", phaseName, false));
    // [c3 -- n6 -- n7 -- a8, a9] --> [a8]
    // [c3 -- n6 -- n7 -- a8, a9] --> [a9]
    phaseConnections.add(new Connection(phaseName, "a8"));
    phaseConnections.add(new Connection(phaseName, "a9"));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class PipelinePlannerTest method testGeneratePlan.

@Test
public void testGeneratePlan() {
    /*
             |--- n2(r) ----------|
             |                    |                                    |-- n10
        n1 --|--- n3(r) --- n5 ---|--- n6 --- n7(r) --- n8 --- n9(r) --|
             |                    |                                    |-- n11
             |--- n4(r) ----------|
     */
    // create the spec for this pipeline
    Schema schema = Schema.recordOf("stuff", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).addOutput(schema, "n2", "n3", "n4").build(), StageSpec.builder("n2", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n6").build(), StageSpec.builder("n3", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n5").build(), StageSpec.builder("n4", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n6").build(), StageSpec.builder("n5", NODE).addInputSchema("n3", schema).addOutput(schema, "n6").build(), StageSpec.builder("n6", NODE).addInputSchemas(ImmutableMap.of("n2", schema, "n5", schema, "n4", schema)).addOutput(schema, "n7").build(), StageSpec.builder("n7", REDUCE).addInputSchema("n6", schema).addOutput(schema, "n8").build(), StageSpec.builder("n8", NODE).addInputSchema("n7", schema).addOutput(schema, "n9").build(), StageSpec.builder("n9", REDUCE).addInputSchema("n8", schema).addOutput(schema, "n10", "n11").build(), StageSpec.builder("n10", NODE).addInputSchema("n9", schema).build(), StageSpec.builder("n11", NODE).addInputSchema("n9", schema).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n1", "n3"), new Connection("n1", "n4"), new Connection("n2", "n6"), new Connection("n3", "n5"), new Connection("n4", "n6"), new Connection("n5", "n6"), new Connection("n6", "n7"), new Connection("n7", "n8"), new Connection("n8", "n9"), new Connection("n9", "n10"), new Connection("n9", "n11"));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE);
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
        n1 --> n1.out.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).addOutput(schema, "n2", "n3", "n4").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SINK_TYPE)).build()).addConnections("n1", ImmutableSet.of("n1.out.connector")).build();
    String phase1Name = PipelinePlanner.getPhaseName(phase1.getDag());
    phases.put(phase1Name, phase1);
    /*
        phase2:
        n1.out.connector --- n2(r) --- n6 --- n7.connector
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n2", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutput(schema, "n7").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n2").addConnection("n2", "n6").addConnection("n6", "n7.connector").build();
    String phase2Name = PipelinePlanner.getPhaseName(phase2.getDag());
    phases.put(phase2Name, phase2);
    /*
        phase3:
        n1.out.connector --- n3(r) --- n5 --- n6 --- n7.connector
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n5", NODE).addInputSchema("n3", schema).addOutput(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutput(schema, "n7").build()).addStage(StageSpec.builder("n3", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n5").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n3").addConnection("n3", "n5").addConnection("n5", "n6").addConnection("n6", "n7.connector").build();
    String phase3Name = PipelinePlanner.getPhaseName(phase3.getDag());
    phases.put(phase3Name, phase3);
    /*
        phase4:
        n1.out.connector --- n4(r) --- n6 --- n7.connector
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n4", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutput(schema, "n7").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n4").addConnection("n4", "n6").addConnection("n6", "n7.connector").build();
    String phase4Name = PipelinePlanner.getPhaseName(phase4.getDag());
    phases.put(phase4Name, phase4);
    /*
        phase5:
        n7.connector --- n7(r) --- n8 --- n9.connector
     */
    PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n8", NODE).addInputSchema("n7", schema).addOutput(schema, "n9").build()).addStage(StageSpec.builder("n7", REDUCE).addInputSchema("n6", schema).addOutput(schema, "n8").build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n9.connector", connectorSpec("n9", Constants.Connector.SINK_TYPE)).build()).addConnection("n7.connector", "n7").addConnection("n7", "n8").addConnection("n8", "n9.connector").build();
    String phase5Name = PipelinePlanner.getPhaseName(phase5.getDag());
    phases.put(phase5Name, phase5);
    /*
        phase6:
                                 |-- n10
        n9.connector --- n9(r) --|
                                 |-- n11
     */
    PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n10", NODE).addInputSchema("n9", schema).build()).addStage(StageSpec.builder("n11", NODE).addInputSchema("n9", schema).build()).addStage(StageSpec.builder("n9", REDUCE).addInputSchema("n8", schema).addOutput(schema, "n10", "n11").build()).addStage(StageSpec.builder("n9.connector", connectorSpec("n9", Constants.Connector.SOURCE_TYPE)).build()).addConnection("n9.connector", "n9").addConnection("n9", "n10").addConnection("n9", "n11").build();
    String phase6Name = PipelinePlanner.getPhaseName(phase6.getDag());
    phases.put(phase6Name, phase6);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase1Name, phase3Name));
    phaseConnections.add(new Connection(phase1Name, phase4Name));
    phaseConnections.add(new Connection(phase2Name, phase5Name));
    phaseConnections.add(new Connection(phase3Name, phase5Name));
    phaseConnections.add(new Connection(phase4Name, phase5Name));
    phaseConnections.add(new Connection(phase5Name, phase6Name));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 13 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSQLEngine.

@Test
public void testSQLEngine() throws ValidationException {
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).setPushdownEnabled(true).setTransformationPushdown(new ETLTransformationPushdown(MOCK_SQL_ENGINE)).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setSqlEngineStageSpec(StageSpec.builder("sqlengine_mocksqlengine", new PluginSpec(BatchSQLEngine.PLUGIN_TYPE, "mocksqlengine", emptyMap, ARTIFACT_ID)).build()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLTransformationPushdown(io.cdap.cdap.etl.proto.v2.ETLTransformationPushdown) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 14 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class SparkStreamingPipelineDriver method run.

@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
    DataStreamsPipelineSpec pipelineSpec = GSON.fromJson(sec.getSpecification().getProperty(Constants.PIPELINEID), DataStreamsPipelineSpec.class);
    Set<StageSpec> stageSpecs = pipelineSpec.getStages();
    PipelinePhase pipelinePhase = PipelinePhase.builder(SUPPORTED_PLUGIN_TYPES).addConnections(pipelineSpec.getConnections()).addStages(stageSpecs).build();
    boolean checkpointsDisabled = pipelineSpec.isCheckpointsDisabled();
    boolean isPreviewEnabled = stageSpecs.isEmpty() || sec.getDataTracer(stageSpecs.iterator().next().getName()).isEnabled();
    String checkpointDir = null;
    JavaSparkContext context = null;
    if (!checkpointsDisabled && !isPreviewEnabled) {
        String pipelineName = sec.getApplicationSpecification().getName();
        String configCheckpointDir = pipelineSpec.getCheckpointDirectory();
        if (Strings.isNullOrEmpty(configCheckpointDir)) {
            // Use the directory of a fileset dataset if the checkpoint directory is not set.
            Admin admin = sec.getAdmin();
            // TODO: CDAP-16329 figure out a way to filter out this fileset in dataset lineage
            if (!admin.datasetExists(DEFAULT_CHECKPOINT_DATASET_NAME)) {
                admin.createDataset(DEFAULT_CHECKPOINT_DATASET_NAME, FileSet.class.getName(), FileSetProperties.builder().build());
            }
            // there isn't any way to instantiate the fileset except in a TxRunnable, so need to use a reference.
            AtomicReference<Location> checkpointBaseRef = new AtomicReference<>();
            Transactionals.execute(sec, new TxRunnable() {

                @Override
                public void run(DatasetContext context) throws Exception {
                    FileSet checkpointFileSet = context.getDataset(DEFAULT_CHECKPOINT_DATASET_NAME);
                    checkpointBaseRef.set(checkpointFileSet.getBaseLocation());
                }
            });
            configCheckpointDir = checkpointBaseRef.get().toURI().toString();
        }
        Path baseCheckpointDir = new Path(new Path(configCheckpointDir), pipelineName);
        Path checkpointDirPath = new Path(baseCheckpointDir, pipelineSpec.getPipelineId());
        checkpointDir = checkpointDirPath.toString();
        context = new JavaSparkContext();
        Configuration configuration = context.hadoopConfiguration();
        // Set the filesystem to whatever the checkpoint directory uses. This is necessary since spark will override
        // the URI schema with what is set in this config. This needs to happen before StreamingCompat.getOrCreate
        // is called, since StreamingCompat.getOrCreate will attempt to parse the checkpointDir before calling
        // context function.
        URI checkpointUri = checkpointDirPath.toUri();
        if (checkpointUri.getScheme() != null) {
            configuration.set("fs.defaultFS", checkpointDir);
        }
        FileSystem fileSystem = FileSystem.get(checkpointUri, configuration);
        // On start, we check for any other pipeline ids for that pipeline name, and delete them if they exist.
        if (!ensureDirExists(fileSystem, baseCheckpointDir)) {
            throw new IOException(String.format("Unable to create checkpoint base directory '%s' for the pipeline.", baseCheckpointDir));
        }
        try {
            for (FileStatus child : fileSystem.listStatus(baseCheckpointDir)) {
                if (child.isDirectory()) {
                    if (!child.getPath().equals(checkpointDirPath) && !fileSystem.delete(child.getPath(), true)) {
                        LOG.warn("Unable to delete checkpoint directory {} from an old pipeline.", child);
                    }
                }
            }
        } catch (Exception e) {
            LOG.warn("Unable to clean up old checkpoint directories from old pipelines.", e);
        }
        if (!ensureDirExists(fileSystem, checkpointDirPath)) {
            throw new IOException(String.format("Unable to create checkpoint directory '%s' for the pipeline.", checkpointDir));
        }
    }
    JavaStreamingContext jssc = run(pipelineSpec, pipelinePhase, sec, checkpointDir, context);
    jssc.start();
    boolean stopped = false;
    try {
        // most programs will just keep running forever.
        // however, when CDAP stops the program, we get an interrupted exception.
        // at that point, we need to call stop on jssc, otherwise the program will hang and never stop.
        stopped = jssc.awaitTerminationOrTimeout(Long.MAX_VALUE);
    } finally {
        if (!stopped) {
            jssc.stop(true, pipelineSpec.isStopGracefully());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) Configuration(org.apache.hadoop.conf.Configuration) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) Admin(io.cdap.cdap.api.Admin) URI(java.net.URI) IOException(java.io.IOException) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) TxRunnable(io.cdap.cdap.api.TxRunnable) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) FileSystem(org.apache.hadoop.fs.FileSystem) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) DatasetContext(io.cdap.cdap.api.data.DatasetContext) Location(org.apache.twill.filesystem.Location)

Example 15 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testGenerateSpec.

@Test
public void testGenerateSpec() throws ValidationException {
    /*
     *           ---- t1 ------------
     *           |            |      |
     * source ---             |      |--- t3 --- sink1
     *           |            |      |
     *           ------------ t2 --------------- sink2
     *           |                        |
     *           |                        |
     *           -------------------------
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addStage(new ETLStage("t3", MOCK_TRANSFORM_B)).addConnection("source", "t1").addConnection("source", "t2").addConnection("source", "sink2").addConnection("t1", "t2").addConnection("t1", "t3").addConnection("t1", "sink2").addConnection("t2", "sink2").addConnection("t2", "t3").addConnection("t3", "sink1").setNumOfRecordsPreview(100).build();
    // test the spec generated is correct, with the right input and output schemas and artifact information.
    BatchPipelineSpec actual = specGenerator.generateSpec(etlConfig);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "t1", "t2", "sink2").build()).addStage(StageSpec.builder("sink1", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("t3", SCHEMA_B).setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("sink2", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A, "source", SCHEMA_A)).setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("t1", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_A, "t2", "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t2", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("source", SCHEMA_A, "t1", SCHEMA_A)).addOutput(SCHEMA_A, "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t3", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A)).addOutput(SCHEMA_B, "sink1").setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).setNumOfRecordsPreview(etlConfig.getNumOfRecordsPreview()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Resources(io.cdap.cdap.api.Resources) Test(org.junit.Test)

Aggregations

PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)18 Test (org.junit.Test)18 BatchPipelineSpec (io.cdap.cdap.etl.batch.BatchPipelineSpec)11 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)11 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)11 PluginSpec (io.cdap.cdap.etl.proto.v2.spec.PluginSpec)11 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)9 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)8 Connection (io.cdap.cdap.etl.proto.Connection)8 HashMap (java.util.HashMap)8 HashSet (java.util.HashSet)7 Resources (io.cdap.cdap.api.Resources)3 Schema (io.cdap.cdap.api.data.schema.Schema)3 ETLTransformationPushdown (io.cdap.cdap.etl.proto.v2.ETLTransformationPushdown)2 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Admin (io.cdap.cdap.api.Admin)1 TxRunnable (io.cdap.cdap.api.TxRunnable)1 ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)1