Search in sources :

Example 26 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class AutoJoinerTest method testQuadAutoJoin.

private void testQuadAutoJoin(List<String> required, List<String> broadcast, Set<StructuredRecord> expected, Engine engine, List<String> tablesInOrderToJoin) throws Exception {
    /*
         users ------|
                     |
         purchases --|--> join --> sink
                     |
         interests --|
                     |
         age --------|

         joinOn: users.region = purchases.region = interests.region = age.region and
                 users.user_id = purchases.user_id = interests.user_id = age.user_id
     */
    String userInput = UUID.randomUUID().toString();
    String purchaseInput = UUID.randomUUID().toString();
    String interestInput = UUID.randomUUID().toString();
    String ageInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput, USER_SCHEMA))).addStage(new ETLStage("purchases", MockSource.getPlugin(purchaseInput, PURCHASE_SCHEMA))).addStage(new ETLStage("interests", MockSource.getPlugin(interestInput, INTEREST_SCHEMA))).addStage(new ETLStage("ages", MockSource.getPlugin(ageInput, AGE_SCHEMA))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(tablesInOrderToJoin, Arrays.asList("region", "user_id"), required, broadcast, Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("purchases", "join").addConnection("interests", "join").addConnection("ages", "join").addConnection("join", "sink").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    List<StructuredRecord> userData = Arrays.asList(USER_ALICE, USER_ALYCE, USER_BOB);
    DataSetManager<Table> inputManager = getDataset(userInput);
    MockSource.writeInput(inputManager, userData);
    List<StructuredRecord> purchaseData = new ArrayList<>();
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 0).set("purchase_id", 123).build());
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 2).set("purchase_id", 456).build());
    inputManager = getDataset(purchaseInput);
    MockSource.writeInput(inputManager, purchaseData);
    List<StructuredRecord> interestData = new ArrayList<>();
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 0).set("interest", "food").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 0).set("interest", "sports").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 1).set("interest", "gardening").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 2).set("interest", "gaming").build());
    inputManager = getDataset(interestInput);
    MockSource.writeInput(inputManager, interestData);
    List<StructuredRecord> ageData = new ArrayList<>();
    ageData.add(StructuredRecord.builder(AGE_SCHEMA).set("region", "us").set("user_id", 10).set("age", 20).build());
    ageData.add(StructuredRecord.builder(AGE_SCHEMA).set("region", "us").set("user_id", 1).set("age", 30).build());
    inputManager = getDataset(ageInput);
    MockSource.writeInput(inputManager, ageData);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Set<StructuredRecord> actual = new HashSet<>();
    Schema expectedSchema = expected.iterator().hasNext() ? expected.iterator().next().getSchema() : null;
    if (expectedSchema == null || expected.iterator().next().getSchema() == outputRecords.get(0).getSchema()) {
        actual = new HashSet<>(outputRecords);
    } else {
        // reorder the output columns of the join result (actual) to match the column order of expected
        for (StructuredRecord sr : outputRecords) {
            actual.add(StructuredRecord.builder(expectedSchema).set("ages_region", sr.get("ages_region")).set("ages_age", sr.get("ages_age")).set("ages_user_id", sr.get("ages_user_id")).set("purchases_region", sr.get("purchases_region")).set("purchases_purchase_id", sr.get("purchases_purchase_id")).set("purchases_user_id", sr.get("purchases_user_id")).set("users_region", sr.get("users_region")).set("users_user_id", sr.get("users_user_id")).set("users_name", sr.get("users_name")).set("interests_region", sr.get("interests_region")).set("interests_user_id", sr.get("interests_user_id")).set("interests_interest", sr.get("interests_interest")).build());
        }
    }
    Assert.assertEquals(expected, actual);
    validateMetric(11, appId, "join.records.in");
    validateMetric(expected.size(), appId, "join.records.out");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) WorkflowManager(io.cdap.cdap.test.WorkflowManager) Schema(io.cdap.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 27 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class AutoJoinerTest method testCaseSensitivity.

@Test
public void testCaseSensitivity() throws Exception {
    Schema weird1 = Schema.recordOf("weird1", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("ID", Schema.of(Schema.Type.LONG)), Schema.Field.of("Id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    Schema weird2 = Schema.recordOf("weird2", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("ID", Schema.of(Schema.Type.LONG)), Schema.Field.of("val", Schema.of(Schema.Type.STRING)));
    String input1 = UUID.randomUUID().toString();
    String input2 = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("i1", MockSource.getPlugin(input1, weird1))).addStage(new ETLStage("i2", MockSource.getPlugin(input2, weird2))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(Arrays.asList("i1", "i2"), Arrays.asList("id", "ID"), Arrays.asList("i1", "i2"), Collections.emptyList(), Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("i1", "join").addConnection("i2", "join").addConnection("join", "sink").setEngine(Engine.SPARK).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    List<StructuredRecord> input1Data = new ArrayList<>();
    input1Data.add(StructuredRecord.builder(weird1).set("id", 0).set("ID", 99L).set("Id", 0).set("name", "zero").build());
    input1Data.add(StructuredRecord.builder(weird1).set("id", 1).set("ID", 0L).set("Id", 0).set("name", "one").build());
    DataSetManager<Table> inputManager = getDataset(input1);
    MockSource.writeInput(inputManager, input1Data);
    List<StructuredRecord> input2Data = new ArrayList<>();
    input2Data.add(StructuredRecord.builder(weird2).set("id", 0).set("ID", 99L).set("val", "0").build());
    input2Data.add(StructuredRecord.builder(weird2).set("id", 1).set("ID", 99L).set("val", "1").build());
    input2Data.add(StructuredRecord.builder(weird2).set("id", 0).set("ID", 0L).set("val", "2").build());
    inputManager = getDataset(input2);
    MockSource.writeInput(inputManager, input2Data);
    Schema expectedSchema = Schema.recordOf("i1.i2", Schema.Field.of("i1_id", Schema.of(Schema.Type.INT)), Schema.Field.of("i1_ID", Schema.of(Schema.Type.LONG)), Schema.Field.of("i1_Id", Schema.of(Schema.Type.INT)), Schema.Field.of("i1_name", Schema.of(Schema.Type.STRING)), Schema.Field.of("i2_id", Schema.of(Schema.Type.INT)), Schema.Field.of("i2_ID", Schema.of(Schema.Type.LONG)), Schema.Field.of("i2_val", Schema.of(Schema.Type.STRING)));
    StructuredRecord expected = StructuredRecord.builder(expectedSchema).set("i1_id", 0).set("i1_ID", 99L).set("i1_Id", 0).set("i1_name", "zero").set("i2_id", 0).set("i2_ID", 99L).set("i2_val", "0").build();
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    Map<String, String> args = Collections.singletonMap(MockAutoJoiner.PARTITIONS_ARGUMENT, "1");
    workflowManager.startAndWaitForGoodRun(args, ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> actual = MockSink.readOutput(outputManager);
    Assert.assertEquals(Collections.singletonList(expected), actual);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 28 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class AutoJoinerTest method testAutoJoinWithMacros.

private void testAutoJoinWithMacros(Engine engine, List<String> required, Schema expectedSchema, Set<StructuredRecord> expectedRecords, boolean excludeUsers, boolean excludePurchases) throws Exception {
    /*
         users ------|
                     |--> join --> sink
         purchases --|

         joinOn: users.region = purchases.region and users.user_id = purchases.user_id
     */
    String userInput = UUID.randomUUID().toString();
    String purchaseInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    Map<String, String> joinerProps = new HashMap<>();
    joinerProps.put(MockAutoJoiner.Conf.STAGES, "${stages}");
    joinerProps.put(MockAutoJoiner.Conf.KEY, "${key}");
    joinerProps.put(MockAutoJoiner.Conf.REQUIRED, "${required}");
    joinerProps.put(MockAutoJoiner.Conf.SELECT, "${select}");
    if (engine == Engine.SPARK || (required.size() < 2 && engine == Engine.MAPREDUCE)) {
        joinerProps.put(MockAutoJoiner.Conf.SCHEMA, "${schema}");
    }
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput))).addStage(new ETLStage("purchases", MockSource.getPlugin(purchaseInput))).addStage(new ETLStage("join", new ETLPlugin(MockAutoJoiner.NAME, BatchJoiner.PLUGIN_TYPE, joinerProps))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("purchases", "join").addConnection("join", "sink").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    if (!excludeUsers) {
        List<StructuredRecord> userData = Arrays.asList(USER_ALICE, USER_ALYCE, USER_BOB);
        DataSetManager<Table> inputManager = getDataset(userInput);
        MockSource.writeInput(inputManager, userData);
    }
    if (!excludePurchases) {
        List<StructuredRecord> purchaseData = new ArrayList<>();
        purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 0).set("purchase_id", 123).build());
        purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 2).set("purchase_id", 456).build());
        DataSetManager<Table> inputManager = getDataset(purchaseInput);
        MockSource.writeInput(inputManager, purchaseData);
    }
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    List<JoinField> selectedFields = new ArrayList<>();
    selectedFields.add(new JoinField("purchases", "region"));
    selectedFields.add(new JoinField("purchases", "purchase_id"));
    selectedFields.add(new JoinField("purchases", "user_id"));
    selectedFields.add(new JoinField("users", "name"));
    Map<String, String> joinerProperties = MockAutoJoiner.getProperties(Arrays.asList("purchases", "users"), Arrays.asList("region", "user_id"), required, Collections.emptyList(), selectedFields, true);
    Map<String, String> runtimeArgs = new HashMap<>();
    runtimeArgs.put("stages", joinerProperties.get(MockAutoJoiner.Conf.STAGES));
    runtimeArgs.put("key", joinerProperties.get(MockAutoJoiner.Conf.KEY));
    runtimeArgs.put("required", joinerProperties.get(MockAutoJoiner.Conf.REQUIRED));
    runtimeArgs.put("select", joinerProperties.get(MockAutoJoiner.Conf.SELECT));
    runtimeArgs.put("schema", expectedSchema.toString());
    workflowManager.startAndWaitForGoodRun(runtimeArgs, ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Assert.assertEquals(expectedRecords, new HashSet<>(outputRecords));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) HashMap(java.util.HashMap) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) JoinField(io.cdap.cdap.etl.api.join.JoinField) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Example 29 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class AutoJoinerTest method testInnerBetweenCondition.

@Test
public void testInnerBetweenCondition() throws Exception {
    /*
         users ----------|
                         |--> join --> sink
         age_groups -----|

         joinOn: users.age > age_groups.lo and (users.age <= age_groups.hi or age_groups.hi is null)
     */
    Schema userSchema = Schema.recordOf("user", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.nullableOf(Schema.of(Schema.Type.INT))));
    Schema ageGroupSchema = Schema.recordOf("age_group", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("lo", Schema.of(Schema.Type.INT)), Schema.Field.of("hi", Schema.nullableOf(Schema.of(Schema.Type.INT))));
    Schema expectedSchema = Schema.recordOf("users.age_groups", Schema.Field.of("username", Schema.of(Schema.Type.STRING)), Schema.Field.of("age_group", Schema.of(Schema.Type.STRING)));
    String userInput = UUID.randomUUID().toString();
    String agesInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    List<JoinField> select = new ArrayList<>();
    select.add(new JoinField("users", "name", "username"));
    select.add(new JoinField("age_groups", "name", "age_group"));
    JoinCondition.OnExpression condition = JoinCondition.onExpression().setExpression("users.age >= age_groups.lo and (users.age < age_groups.hi or age_groups.hi is null)").build();
    Map<String, String> joinerProperties = MockAutoJoiner.getProperties(Arrays.asList("users", "age_groups"), Collections.emptyList(), Arrays.asList("users", "age_groups"), Collections.emptyList(), select, false, null, condition);
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput, userSchema))).addStage(new ETLStage("age_groups", MockSource.getPlugin(agesInput, ageGroupSchema))).addStage(new ETLStage("join", new ETLPlugin(MockAutoJoiner.NAME, BatchJoiner.PLUGIN_TYPE, joinerProperties))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("age_groups", "join").addConnection("join", "sink").setEngine(Engine.SPARK).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    List<StructuredRecord> records = new ArrayList<>();
    records.add(StructuredRecord.builder(userSchema).set("name", "Alice").set("age", 35).build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Bob").build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Carl").set("age", 13).build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Dave").set("age", 0).build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Elaine").set("age", 68).build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Fred").set("age", 4).build());
    DataSetManager<Table> inputManager = getDataset(userInput);
    MockSource.writeInput(inputManager, records);
    records.clear();
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "infant").set("lo", 0).set("hi", 2).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "toddler").set("lo", 2).set("hi", 5).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "child").set("lo", 5).set("hi", 13).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "teen").set("lo", 13).set("hi", 20).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "adult").set("lo", 20).set("hi", 65).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "senior").set("lo", 65).build());
    inputManager = getDataset(agesInput);
    MockSource.writeInput(inputManager, records);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Set<StructuredRecord> expected = new HashSet<>();
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Alice").set("age_group", "adult").build());
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Carl").set("age_group", "teen").build());
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Dave").set("age_group", "infant").build());
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Elaine").set("age_group", "senior").build());
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Fred").set("age_group", "toddler").build());
    Assert.assertEquals(expected, new HashSet<>(outputRecords));
    validateMetric(6, appId, "users.records.out");
    validateMetric(6, appId, "age_groups.records.out");
    validateMetric(12, appId, "join.records.in");
    validateMetric(expected.size(), appId, "join.records.out");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) JoinField(io.cdap.cdap.etl.api.join.JoinField) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) JoinCondition(io.cdap.cdap.etl.api.join.JoinCondition) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 30 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class AutoJoinerTest method testSimpleAutoJoin.

private void testSimpleAutoJoin(List<String> required, List<String> broadcast, Set<StructuredRecord> expected, Engine engine) throws Exception {
    /*
         users ------|
                     |--> join --> sink
         purchases --|

         joinOn: users.region = purchases.region and users.user_id = purchases.user_id
     */
    String userInput = UUID.randomUUID().toString();
    String purchaseInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput, USER_SCHEMA))).addStage(new ETLStage("purchases", MockSource.getPlugin(purchaseInput, PURCHASE_SCHEMA))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(Arrays.asList("purchases", "users"), Arrays.asList("region", "user_id"), required, broadcast, Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("purchases", "join").addConnection("join", "sink").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    List<StructuredRecord> userData = Arrays.asList(USER_ALICE, USER_ALYCE, USER_BOB);
    DataSetManager<Table> inputManager = getDataset(userInput);
    MockSource.writeInput(inputManager, userData);
    List<StructuredRecord> purchaseData = new ArrayList<>();
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 0).set("purchase_id", 123).build());
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 2).set("purchase_id", 456).build());
    inputManager = getDataset(purchaseInput);
    MockSource.writeInput(inputManager, purchaseData);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    Map<String, String> args = Collections.singletonMap(MockAutoJoiner.PARTITIONS_ARGUMENT, "1");
    workflowManager.startAndWaitForGoodRun(args, ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Assert.assertEquals(expected, new HashSet<>(outputRecords));
    validateMetric(5, appId, "join.records.in");
    validateMetric(expected.size(), appId, "join.records.out");
    if (engine != Engine.SPARK) {
        // In SPARK number of partitions hint is ignored, so additional sinks are created
        validateMetric(1, appId, "sink." + MockSink.INITIALIZED_COUNT_METRIC);
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Aggregations

WorkflowManager (io.cdap.cdap.test.WorkflowManager)80 ApplicationManager (io.cdap.cdap.test.ApplicationManager)78 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)69 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)67 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)67 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)63 Table (io.cdap.cdap.api.dataset.table.Table)59 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)54 Schema (io.cdap.cdap.api.data.schema.Schema)46 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)45 Test (org.junit.Test)40 ArrayList (java.util.ArrayList)23 HashMap (java.util.HashMap)16 HashSet (java.util.HashSet)15 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)12 File (java.io.File)12 RunRecord (io.cdap.cdap.proto.RunRecord)9 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)7 IOException (java.io.IOException)6 ServiceManager (io.cdap.cdap.test.ServiceManager)5