Search in sources :

Example 46 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testSecureStorePipeline.

/**
 * Tests the secure storage macro function in a pipelines by creating datasets from the secure store data.
 */
private void testSecureStorePipeline(Engine engine, String prefix) throws Exception {
    /*
     * Trivial pipeline from batch source to batch sink.
     *
     * source --------- sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockRuntimeDatasetSource.getPlugin("input", "${secure(" + prefix + "source)}"))).addStage(new ETLStage("sink", MockRuntimeDatasetSink.getPlugin("output", "${secure(" + prefix + "sink)}"))).addConnection("source", "sink").setEngine(engine).build();
    // place dataset names into secure storage
    getSecureStoreManager().put("default", prefix + "source", prefix + "MockSecureSourceDataset", "secure source dataset name", new HashMap<>());
    getSecureStoreManager().put("default", prefix + "sink", prefix + "MockSecureSinkDataset", "secure dataset name", new HashMap<>());
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("App-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    // make sure the datasets don't exist beforehand
    Assert.assertNull(getDataset(prefix + "MockSecureSourceDataset").get());
    Assert.assertNull(getDataset(prefix + "MockSecureSinkDataset").get());
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // now the datasets should exist
    Assert.assertNotNull(getDataset(prefix + "MockSecureSourceDataset").get());
    Assert.assertNotNull(getDataset(prefix + "MockSecureSinkDataset").get());
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) AppRequest(io.cdap.cdap.proto.artifact.AppRequest)

Example 47 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testAlertPublisher.

private void testAlertPublisher(Engine engine) throws Exception {
    String sourceName = "alertSource" + engine.name();
    String sinkName = "alertSink" + engine.name();
    String topic = "alertTopic" + engine.name();
    /*
     * source --> nullAlert --> sink
     *               |
     *               |--> TMS publisher
     */
    ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms alert", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms alert").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
    StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
    StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
    StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
    DataSetManager<Table> sourceTable = getDataset(sourceName);
    MockSource.writeInput(sourceTable, ImmutableList.of(record1, record2, alertRecord));
    WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    manager.start();
    manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    DataSetManager<Table> sinkTable = getDataset(sinkName);
    Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(sinkTable));
    Set<StructuredRecord> expected = ImmutableSet.of(record1, record2);
    Assert.assertEquals(expected, actual);
    MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
    Set<Alert> actualMessages = new HashSet<>();
    try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
        while (iter.hasNext()) {
            Message message = iter.next();
            Alert alert = message.decodePayload(r -> GSON.fromJson(r, Alert.class));
            actualMessages.add(alert);
        }
    }
    Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<>()));
    Assert.assertEquals(expectedMessages, actualMessages);
    validateMetric(3, appId, "source.records.out");
    validateMetric(3, appId, "nullAlert.records.in");
    validateMetric(2, appId, "nullAlert.records.out");
    validateMetric(1, appId, "nullAlert.records.alert");
    validateMetric(2, appId, "sink.records.in");
    validateMetric(1, appId, "tms alert.records.in");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Message(io.cdap.cdap.api.messaging.Message) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Alert(io.cdap.cdap.etl.api.Alert) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 48 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testSimpleConditionWithActions.

@Test
public void testSimpleConditionWithActions() throws Exception {
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     * action --> condition --> file ---> trueSink
     *              |
     *              |---file-->----> falseSink
     *
     */
    String appName = "SimpleConditionWithActions";
    String trueSource = "true" + appName + "Source";
    String falseSource = "false" + appName + "Source";
    String trueSink = "true" + appName + "Sink";
    String falseSink = "false" + appName + "Sink";
    String actionTable = "actionTable" + appName;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("trueSource", MockSource.getPlugin(trueSource, schema))).addStage(new ETLStage("falseSource", MockSource.getPlugin(falseSource, schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin(trueSink))).addStage(new ETLStage("falseSink", MockSink.getPlugin(falseSink))).addStage(new ETLStage("condition", MockCondition.getPlugin("condition"))).addStage(new ETLStage("action", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addConnection("action", "condition").addConnection("condition", "trueSource", true).addConnection("condition", "falseSource", false).addConnection("trueSource", "trueSink").addConnection("falseSource", "falseSink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app(appName);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    for (String branch : Arrays.asList("true", "false")) {
        String source = branch.equals("true") ? trueSource : falseSource;
        String sink = branch.equals("true") ? trueSink : falseSink;
        // write records to source
        DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
        MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
        WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
        workflowManager.start(ImmutableMap.of("condition.branch.to.execute", branch));
        if (branch.equals("true")) {
            workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
        } else {
            workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
        }
        // check sink
        DataSetManager<Table> sinkManager = getDataset(sink);
        Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
        Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
        Assert.assertEquals(expected, actual);
        validateMetric(2, appId, branch + "Source.records.out");
        validateMetric(2, appId, branch + "Sink.records.in");
        // check Action is executed correctly
        DataSetManager<Table> actionTableDS = getDataset(actionTable);
        Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 49 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testSplitterToJoiner.

private void testSplitterToJoiner(Engine engine) throws Exception {
    Schema schema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    Schema infoSchema = Schema.recordOf("userInfo", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("fname", Schema.of(Schema.Type.STRING)));
    Schema joinedSchema = Schema.recordOf("join.output", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("fname", Schema.of(Schema.Type.STRING)));
    StructuredRecord user0 = StructuredRecord.builder(schema).set("id", 0L).build();
    StructuredRecord user1 = StructuredRecord.builder(schema).set("id", 1L).set("name", "one").build();
    StructuredRecord user0Info = StructuredRecord.builder(infoSchema).set("id", 0L).set("fname", "zero").build();
    StructuredRecord user0Joined = StructuredRecord.builder(joinedSchema).set("id", 0L).set("fname", "zero").build();
    String signupsName = "splitjoinSignups" + engine.name();
    String userInfoName = "splitjoinUserInfo" + engine.name();
    String sink1Name = "splitjoinSink1" + engine.name();
    String sink2Name = "splitjoinSink2" + engine.name();
    /*
     * userInfo --------------------------|
     *                                    |--> joiner --> sink1
     *                            |null --|
     * signups --> namesplitter --|
     *                            |non-null --> sink2
     */
    ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("signups", MockSource.getPlugin(signupsName, schema))).addStage(new ETLStage("userInfo", MockSource.getPlugin(userInfoName, infoSchema))).addStage(new ETLStage("namesplitter", NullFieldSplitterTransform.getPlugin("name"))).addStage(new ETLStage("joiner", MockJoiner.getPlugin("namesplitter.id=userInfo.id", "namesplitter,userInfo", ""))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Name))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Name))).addConnection("signups", "namesplitter").addConnection("namesplitter", "sink2", "non-null").addConnection("namesplitter", "joiner", "null").addConnection("userInfo", "joiner").addConnection("joiner", "sink1").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("SplitJoinTest-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write signups data
    DataSetManager<Table> signupsManager = getDataset(signupsName);
    MockSource.writeInput(signupsManager, ImmutableList.of(user0, user1));
    // write to userInfo the name for user0 to join against
    DataSetManager<Table> userInfoManager = getDataset(userInfoName);
    MockSource.writeInput(userInfoManager, ImmutableList.of(user0Info));
    // run pipeline
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // check output
    DataSetManager<Table> sinkManager = getDataset(sink2Name);
    Set<StructuredRecord> expected = ImmutableSet.of(user1);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    sinkManager = getDataset(sink1Name);
    expected = ImmutableSet.of(user0Joined);
    actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    validateMetric(2, appId, "signups.records.out");
    validateMetric(1, appId, "userInfo.records.out");
    validateMetric(2, appId, "namesplitter.records.in");
    validateMetric(1, appId, "namesplitter.records.out.null");
    validateMetric(1, appId, "namesplitter.records.out.non-null");
    validateMetric(2, appId, "joiner.records.in");
    validateMetric(1, appId, "joiner.records.out");
    validateMetric(1, appId, "sink1.records.in");
    validateMetric(1, appId, "sink2.records.in");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Example 50 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testMacrosSparkPipeline.

/**
 * Tests that if macros are provided
 */
@Test
public void testMacrosSparkPipeline() throws Exception {
    /*
     * Trivial Spark pipeline from batch source to batch sink.
     *
     * source --------- sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(Engine.SPARK).addStage(new ETLStage("source", MockRuntimeDatasetSource.getPlugin("sparkinput", "${runtime${source}}"))).addStage(new ETLStage("sink", MockRuntimeDatasetSink.getPlugin("sparkoutput", "${runtime}${sink}"))).addConnection("source", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("SparkApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // set runtime arguments for macro substitution
    Map<String, String> runtimeArguments = ImmutableMap.of("runtime", "mockRuntime", "sink", "SparkSinkDataset", "source", "Source", "runtimeSource", "mockRuntimeSparkSourceDataset");
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.setRuntimeArgs(runtimeArguments);
    // make sure the datasets don't exist beforehand
    Assert.assertNull(getDataset("mockRuntimeSparkSourceDataset").get());
    Assert.assertNull(getDataset("mockRuntimeSparkSinkDataset").get());
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // now the datasets should exist
    Assert.assertNotNull(getDataset("mockRuntimeSparkSourceDataset").get());
    Assert.assertNotNull(getDataset("mockRuntimeSparkSinkDataset").get());
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Aggregations

ApplicationId (io.cdap.cdap.proto.id.ApplicationId)789 Test (org.junit.Test)410 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)279 ProgramId (io.cdap.cdap.proto.id.ProgramId)263 ApplicationManager (io.cdap.cdap.test.ApplicationManager)225 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)223 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)196 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)180 Table (io.cdap.cdap.api.dataset.table.Table)178 WorkflowManager (io.cdap.cdap.test.WorkflowManager)169 NamespaceId (io.cdap.cdap.proto.id.NamespaceId)154 Schema (io.cdap.cdap.api.data.schema.Schema)147 ArrayList (java.util.ArrayList)129 HashSet (java.util.HashSet)126 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)124 HashMap (java.util.HashMap)109 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)107 ArtifactSummary (io.cdap.cdap.api.artifact.ArtifactSummary)88 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)88 Path (javax.ws.rs.Path)75