Search in sources :

Example 21 with SparkManager

use of io.cdap.cdap.test.SparkManager in project cdap by cdapio.

the class DataStreamsTest method testAlertPublisher.

@Test
public void testAlertPublisher() throws Exception {
    String sinkName = "alertSink";
    String topic = "alertTopic";
    Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
    StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
    StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
    StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
    /*
     * source --> nullAlert --> sink
     *               |
     *               |--> TMS publisher
     */
    DataStreamsConfig config = DataStreamsConfig.builder().setBatchInterval("5s").addStage(new ETLStage("source", MockSource.getPlugin(schema, ImmutableList.of(record1, record2, alertRecord)))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms").setCheckpointDir(checkpointDir).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    final Set<StructuredRecord> expectedRecords = ImmutableSet.of(record1, record2);
    final Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<String, String>()));
    final DataSetManager<Table> sinkTable = getDataset(sinkName);
    Tasks.waitFor(true, () -> {
        // get alerts from TMS
        try {
            getMessagingAdmin(NamespaceId.DEFAULT.getNamespace()).getTopicProperties(topic);
        } catch (TopicNotFoundException e) {
            return false;
        }
        MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
        Set<Alert> actualMessages = new HashSet<>();
        try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
            while (iter.hasNext()) {
                Message message = iter.next();
                Alert alert = message.decodePayload(r -> GSON.fromJson(r, Alert.class));
                actualMessages.add(alert);
            }
        }
        // get records from sink
        sinkTable.flush();
        Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(sinkTable));
        return expectedRecords.equals(outputRecords) && expectedMessages.equals(actualMessages);
    }, 4, TimeUnit.MINUTES);
    sparkManager.stop();
    sparkManager.waitForStopped(10, TimeUnit.SECONDS);
    validateMetric(appId, "source.records.out", 3);
    validateMetric(appId, "nullAlert.records.in", 3);
    validateMetric(appId, "nullAlert.records.out", 2);
    validateMetric(appId, "nullAlert.records.alert", 1);
    validateMetric(appId, "sink.records.in", 2);
    validateMetric(appId, "tms.records.in", 1);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) Message(io.cdap.cdap.api.messaging.Message) HashMap(java.util.HashMap) TopicNotFoundException(io.cdap.cdap.api.messaging.TopicNotFoundException) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Alert(io.cdap.cdap.etl.api.Alert) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 22 with SparkManager

use of io.cdap.cdap.test.SparkManager in project cdap by cdapio.

the class DataStreamsTest method testAutoJoinNullEquality.

private void testAutoJoinNullEquality(boolean nullSafe) throws Exception {
    /*
     * customers ----------|
     *                     |
     *                     |---> join ---> sink
     *                     |
     * transactions -------|
     */
    Schema inputSchema1 = Schema.recordOf("customer", Schema.Field.of("customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    Schema inputSchema2 = Schema.recordOf("transaction", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)));
    Schema outSchema = Schema.recordOf("customers.transactions", Schema.Field.of("customers_customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customers_customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("transactions_t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("transactions_customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("transactions_item_id", Schema.of(Schema.Type.STRING)));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_name", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
    StructuredRecord trans1 = StructuredRecord.builder(inputSchema2).set("t_id", "1").set("customer_id", "1").set("item_id", "11").build();
    StructuredRecord trans2 = StructuredRecord.builder(inputSchema2).set("t_id", "2").set("customer_id", "3").set("item_id", "22").build();
    StructuredRecord trans3 = StructuredRecord.builder(inputSchema2).set("t_id", "3").set("item_id", "33").build();
    List<StructuredRecord> input1 = ImmutableList.of(recordSamuel, recordBob, recordJane);
    List<StructuredRecord> input2 = ImmutableList.of(trans1, trans2, trans3);
    String outputName = UUID.randomUUID().toString();
    DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("customers", MockSource.getPlugin(inputSchema1, input1))).addStage(new ETLStage("transactions", MockSource.getPlugin(inputSchema2, input2))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(Arrays.asList("customers", "transactions"), Collections.singletonList("customer_id"), Collections.singletonList("transactions"), Collections.emptyList(), Collections.emptyList(), nullSafe))).addStage(new ETLStage("sink", MockSink.getPlugin(outputName))).addConnection("customers", "join").addConnection("transactions", "join").addConnection("join", "sink").setBatchInterval("5s").setCheckpointDir(checkpointDir).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    StructuredRecord join1 = StructuredRecord.builder(outSchema).set("customers_customer_id", "1").set("customers_customer_name", "samuel").set("transactions_t_id", "1").set("transactions_customer_id", "1").set("transactions_item_id", "11").build();
    StructuredRecord join2 = StructuredRecord.builder(outSchema).set("customers_customer_id", "3").set("customers_customer_name", "jane").set("transactions_t_id", "2").set("transactions_customer_id", "3").set("transactions_item_id", "22").build();
    StructuredRecord join3;
    if (nullSafe) {
        // this transaction has a null customer id, which should match with the null id from customers
        join3 = StructuredRecord.builder(outSchema).set("transactions_t_id", "3").set("transactions_item_id", "33").set("customers_customer_name", "bob").build();
    } else {
        // this transaction has a null customer id, which should not match with the null id from customers
        join3 = StructuredRecord.builder(outSchema).set("transactions_t_id", "3").set("transactions_item_id", "33").build();
    }
    Set<StructuredRecord> expected = ImmutableSet.of(join1, join2, join3);
    DataSetManager<Table> outputManager = getDataset(outputName);
    Tasks.waitFor(true, () -> {
        outputManager.flush();
        Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(outputManager));
        return expected.equals(outputRecords);
    }, 4, TimeUnit.MINUTES);
    sparkManager.stop();
    sparkManager.waitForStopped(10, TimeUnit.SECONDS);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 23 with SparkManager

use of io.cdap.cdap.test.SparkManager in project cdap by cdapio.

the class DataStreamsTest method testWindower.

@Test
public void testWindower() throws Exception {
    /*
     * source --> window(width=10,interval=1) --> aggregator --> filter --> sink
     */
    Schema schema = Schema.recordOf("data", Schema.Field.of("x", Schema.of(Schema.Type.STRING)));
    List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(schema).set("x", "abc").build(), StructuredRecord.builder(schema).set("x", "abc").build(), StructuredRecord.builder(schema).set("x", "abc").build());
    String sinkName = "windowOut";
    // source sleeps 1 second between outputs
    DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(schema, input, 1000L))).addStage(new ETLStage("window", Window.getPlugin(30, 1))).addStage(new ETLStage("agg", FieldCountAggregator.getPlugin("x", "string"))).addStage(new ETLStage("filter", StringValueFilterTransform.getPlugin("x", "all"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "window").addConnection("window", "agg").addConnection("agg", "filter").addConnection("filter", "sink").setBatchInterval("1s").setCheckpointDir(checkpointDir).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("WindowerApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    // the sink should contain at least one record with count of 3, and no records with more than 3.
    // less than 3 if the window doesn't contain all 3 records yet, but there should eventually be a window
    // that contains all 3.
    final DataSetManager<Table> outputManager = getDataset(sinkName);
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            outputManager.flush();
            boolean sawThree = false;
            for (StructuredRecord record : MockSink.readOutput(outputManager)) {
                long count = record.get("ct");
                if (count == 3L) {
                    sawThree = true;
                }
                Assert.assertTrue(count <= 3L);
            }
            return sawThree;
        }
    }, 2, TimeUnit.MINUTES);
    sparkManager.stop();
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) TopicNotFoundException(io.cdap.cdap.api.messaging.TopicNotFoundException) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 24 with SparkManager

use of io.cdap.cdap.test.SparkManager in project cdap by cdapio.

the class DataStreamsTest method testAutoJoin.

@Test
public void testAutoJoin() throws Exception {
    /*
     * customers ----------|
     *                     |
     *                     |---> join ---> sink
     *                     |
     * transactions -------|
     */
    Schema inputSchema1 = Schema.recordOf("customer", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema2 = Schema.recordOf("transaction", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)));
    Schema outSchema = Schema.recordOf("customers.transactions", Schema.Field.of("customers_customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customers_customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("transactions_t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("transactions_customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("transactions_item_id", Schema.of(Schema.Type.STRING)));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
    StructuredRecord tx1 = StructuredRecord.builder(inputSchema2).set("t_id", "1").set("customer_id", "1").set("item_id", "11").build();
    StructuredRecord tx2 = StructuredRecord.builder(inputSchema2).set("t_id", "2").set("customer_id", "3").set("item_id", "22").build();
    StructuredRecord tx3 = StructuredRecord.builder(inputSchema2).set("t_id", "3").set("customer_id", "4").set("item_id", "33").build();
    List<StructuredRecord> input1 = ImmutableList.of(recordSamuel, recordBob, recordJane);
    List<StructuredRecord> input2 = ImmutableList.of(tx1, tx2, tx3);
    String outputName = UUID.randomUUID().toString();
    DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("customers", MockSource.getPlugin(inputSchema1, input1))).addStage(new ETLStage("transactions", MockSource.getPlugin(inputSchema2, input2))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(Arrays.asList("customers", "transactions"), Collections.singletonList("customer_id"), Collections.singletonList("transactions"), Collections.emptyList(), Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(outputName))).addConnection("customers", "join").addConnection("transactions", "join").addConnection("join", "sink").setBatchInterval("5s").setCheckpointDir(checkpointDir).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("AutoJoinerApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    StructuredRecord join1 = StructuredRecord.builder(outSchema).set("customers_customer_id", "1").set("customers_customer_name", "samuel").set("transactions_t_id", "1").set("transactions_customer_id", "1").set("transactions_item_id", "11").build();
    StructuredRecord join2 = StructuredRecord.builder(outSchema).set("customers_customer_id", "3").set("customers_customer_name", "jane").set("transactions_t_id", "2").set("transactions_customer_id", "3").set("transactions_item_id", "22").build();
    StructuredRecord join3 = StructuredRecord.builder(outSchema).set("transactions_t_id", "3").set("transactions_customer_id", "4").set("transactions_item_id", "33").build();
    Set<StructuredRecord> expected = ImmutableSet.of(join1, join2, join3);
    DataSetManager<Table> outputManager = getDataset(outputName);
    Tasks.waitFor(true, () -> {
        outputManager.flush();
        Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(outputManager));
        return expected.equals(outputRecords);
    }, 4, TimeUnit.MINUTES);
    sparkManager.stop();
    sparkManager.waitForStopped(10, TimeUnit.SECONDS);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 25 with SparkManager

use of io.cdap.cdap.test.SparkManager in project cdap by cdapio.

the class DataStreamsTest method testStageConsolidation.

@Test
public void testStageConsolidation() throws Exception {
    /*
                                   |non-null port --> sink1
        items1 --> null splitter --|
                                   |null port--> sink2
                                                   ^
                 |--> filter out id == 0  ---------|
        items2 --|    |
                 |    |--> error collector --> sink3
                 |
                 |                 |--> filter out id == 1 --> sink4
                 |--> aggregator --|
                                   |--> filter out id == 2 --> sink5
     */
    Schema schema = Schema.recordOf("item", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.INT))));
    StructuredRecord itemNull = StructuredRecord.builder(schema).build();
    StructuredRecord item0 = StructuredRecord.builder(schema).set("id", 0).build();
    StructuredRecord item1 = StructuredRecord.builder(schema).set("id", 1).build();
    StructuredRecord item2 = StructuredRecord.builder(schema).set("id", 2).build();
    StructuredRecord item3 = StructuredRecord.builder(schema).set("id", 3).build();
    List<StructuredRecord> input1Records = Arrays.asList(itemNull, item3);
    List<StructuredRecord> input2Records = Arrays.asList(item0, item1, item2);
    File outputDir = TMP_FOLDER.newFolder();
    String output1 = new File(outputDir, "output1").getAbsolutePath();
    String output2 = new File(outputDir, "output2").getAbsolutePath();
    String output3 = new File(outputDir, "output3").getAbsolutePath();
    String output4 = new File(outputDir, "output4").getAbsolutePath();
    String output5 = new File(outputDir, "output5").getAbsolutePath();
    DataStreamsConfig config = DataStreamsConfig.builder().addStage(new ETLStage("items1", MockSource.getPlugin(schema, input1Records))).addStage(new ETLStage("items2", MockSource.getPlugin(schema, input2Records))).addStage(new ETLStage("nullSplitter", NullFieldSplitterTransform.getPlugin("id"))).addStage(new ETLStage("filter0", IntValueFilterTransform.getPlugin("id", 0))).addStage(new ETLStage("collector", FlattenErrorTransform.getPlugin())).addStage(new ETLStage("filter1", IntValueFilterTransform.getPlugin("id", 1))).addStage(new ETLStage("filter2", IntValueFilterTransform.getPlugin("id", 2))).addStage(new ETLStage("identityAggregator", IdentityAggregator.getPlugin())).addStage(new ETLStage("sink1", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s1", output1))).addStage(new ETLStage("sink2", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s2", output2))).addStage(new ETLStage("sink3", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s3", output3))).addStage(new ETLStage("sink4", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s4", output4))).addStage(new ETLStage("sink5", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s5", output5))).addConnection("items1", "nullSplitter").addConnection("nullSplitter", "sink1", "non-null").addConnection("nullSplitter", "sink2", "null").addConnection("items2", "filter0").addConnection("items2", "identityAggregator").addConnection("filter0", "sink2").addConnection("filter0", "collector").addConnection("identityAggregator", "filter1").addConnection("identityAggregator", "filter2").addConnection("collector", "sink3").addConnection("filter1", "sink4").addConnection("filter2", "sink5").setProperties(Collections.singletonMap(io.cdap.cdap.etl.common.Constants.SPARK_PIPELINE_AUTOCACHE_ENABLE_FLAG, "false")).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("StageConsolidationTest");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // run pipeline
    Map<String, String> args = Collections.singletonMap(io.cdap.cdap.etl.common.Constants.CONSOLIDATE_STAGES, "true");
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.startAndWaitForGoodRun(args, ProgramRunStatus.RUNNING, 5, TimeUnit.MINUTES);
    Schema errorSchema = Schema.recordOf("erroritem", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("errMsg", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("errCode", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("errStage", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    StructuredRecord expectedError = StructuredRecord.builder(errorSchema).set("id", 0).set("errMsg", IntValueFilterTransform.ERROR_MESSAGE).set("errCode", IntValueFilterTransform.ERROR_CODE).set("errStage", "filter0").build();
    Set<StructuredRecord> sink1Expected = Collections.singleton(item3);
    Set<StructuredRecord> sink2Expected = new HashSet<>(Arrays.asList(itemNull, item1, item2));
    Set<StructuredRecord> sink3Expected = Collections.singleton(expectedError);
    Set<StructuredRecord> sink4Expected = new HashSet<>(Arrays.asList(item0, item2));
    Set<StructuredRecord> sink5Expected = new HashSet<>(Arrays.asList(item0, item1));
    Tasks.waitFor(true, () -> sink1Expected.equals(new HashSet<>(MockExternalSink.readOutput(output1, schema))) && sink2Expected.equals(new HashSet<>(MockExternalSink.readOutput(output2, schema))) && sink3Expected.equals(new HashSet<>(MockExternalSink.readOutput(output3, errorSchema))) && sink4Expected.equals(new HashSet<>(MockExternalSink.readOutput(output4, schema))) && sink5Expected.equals(new HashSet<>(MockExternalSink.readOutput(output5, schema))), 3, TimeUnit.MINUTES);
    sparkManager.stop();
    sparkManager.waitForStopped(1, TimeUnit.MINUTES);
    // check output
    validateMetric(appId, "nullSplitter.records.in", 2);
    validateMetric(appId, "nullSplitter.records.out.null", 1);
    validateMetric(appId, "nullSplitter.records.out.non-null", 1);
    validateMetric(appId, "filter0.records.out", 2);
    validateMetric(appId, "filter0.records.error", 1);
    validateMetric(appId, "identityAggregator.records.in", 3);
    validateMetric(appId, "identityAggregator.records.out", 3);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

SparkManager (io.cdap.cdap.test.SparkManager)84 ApplicationManager (io.cdap.cdap.test.ApplicationManager)66 Test (org.junit.Test)60 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)34 HashMap (java.util.HashMap)31 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)26 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)26 HashSet (java.util.HashSet)25 Table (io.cdap.cdap.api.dataset.table.Table)24 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)24 DataStreamsConfig (io.cdap.cdap.etl.proto.v2.DataStreamsConfig)22 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)22 Schema (io.cdap.cdap.api.data.schema.Schema)21 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)18 Location (org.apache.twill.filesystem.Location)18 File (java.io.File)17 WorkflowManager (io.cdap.cdap.test.WorkflowManager)14 URL (java.net.URL)14 PrintStream (java.io.PrintStream)12 HttpURLConnection (java.net.HttpURLConnection)12