Search in sources :

Example 76 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class DataPipelineTest method testSimpleConditionWithMultipleOutputActions.

@Test
public void testSimpleConditionWithMultipleOutputActions() throws Exception {
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     *
     * condition --Action--> file ---> trueSink
     *       |
     *       |--->Action--->file----> falseSink
     *
     */
    String appName = "SimpleConditionWithMultipleOutputActions";
    String trueSource = "true" + appName + "Source";
    String falseSource = "false" + appName + "Source";
    String trueSink = "true" + appName + "Sink";
    String falseSink = "false" + appName + "Sink";
    String actionTable = "actionTable" + appName;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("trueSource", MockSource.getPlugin(trueSource, schema))).addStage(new ETLStage("falseSource", MockSource.getPlugin(falseSource, schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin(trueSink))).addStage(new ETLStage("falseSink", MockSink.getPlugin(falseSink))).addStage(new ETLStage("condition", MockCondition.getPlugin("condition"))).addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, "row2", "key2", "val2"))).addConnection("condition", "action1", true).addConnection("action1", "trueSource").addConnection("trueSource", "trueSink").addConnection("condition", "action2", false).addConnection("action2", "falseSource").addConnection("falseSource", "falseSink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app(appName);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    for (String branch : Arrays.asList("true", "false")) {
        String source = branch.equals("true") ? trueSource : falseSource;
        String sink = branch.equals("true") ? trueSink : falseSink;
        // write records to source
        DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
        MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
        WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
        workflowManager.start(ImmutableMap.of("condition.branch.to.execute", branch));
        if (branch.equals("true")) {
            workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
        } else {
            workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
        }
        // check sink
        DataSetManager<Table> sinkManager = getDataset(sink);
        Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
        Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
        Assert.assertEquals(expected, actual);
        validateMetric(2, appId, branch + "Source.records.out");
        validateMetric(2, appId, branch + "Sink.records.in");
        // check Action1 and Action2 is executed correctly
        DataSetManager<Table> actionTableDS = getDataset(actionTable);
        if (branch.equals("true")) {
            Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
        } else {
            Assert.assertEquals("val2", MockAction.readOutput(actionTableDS, "row2", "key2"));
        }
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 77 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class AutoJoinerTest method testTripleAutoJoin.

private void testTripleAutoJoin(List<String> required, List<String> broadcast, Set<StructuredRecord> expected, Engine engine, List<String> tablesInOrderToJoin) throws Exception {
    /*
         users ------|
                     |
         purchases --|--> join --> sink
                     |
         interests --|

         joinOn: users.region = purchases.region = interests.region and
                 users.user_id = purchases.user_id = interests.user_id
     */
    String userInput = UUID.randomUUID().toString();
    String purchaseInput = UUID.randomUUID().toString();
    String interestInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput, USER_SCHEMA))).addStage(new ETLStage("purchases", MockSource.getPlugin(purchaseInput, PURCHASE_SCHEMA))).addStage(new ETLStage("interests", MockSource.getPlugin(interestInput, INTEREST_SCHEMA))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(tablesInOrderToJoin, Arrays.asList("region", "user_id"), required, broadcast, Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("purchases", "join").addConnection("interests", "join").addConnection("join", "sink").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    List<StructuredRecord> userData = Arrays.asList(USER_ALICE, USER_ALYCE, USER_BOB);
    DataSetManager<Table> inputManager = getDataset(userInput);
    MockSource.writeInput(inputManager, userData);
    List<StructuredRecord> purchaseData = new ArrayList<>();
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 0).set("purchase_id", 123).build());
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 2).set("purchase_id", 456).build());
    inputManager = getDataset(purchaseInput);
    MockSource.writeInput(inputManager, purchaseData);
    List<StructuredRecord> interestData = new ArrayList<>();
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 0).set("interest", "food").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 0).set("interest", "sports").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 1).set("interest", "gardening").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 2).set("interest", "gaming").build());
    inputManager = getDataset(interestInput);
    MockSource.writeInput(inputManager, interestData);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Set<StructuredRecord> actual = new HashSet<>();
    Schema expectedSchema = expected.iterator().hasNext() ? expected.iterator().next().getSchema() : null;
    if (expectedSchema == null || expected.iterator().next().getSchema() == outputRecords.get(0).getSchema()) {
        actual = new HashSet<>(outputRecords);
    } else {
        // reorder the output columns of the join result (actual) to match the column order of expected
        for (StructuredRecord sr : outputRecords) {
            actual.add(StructuredRecord.builder(expectedSchema).set("purchases_region", sr.get("purchases_region")).set("purchases_purchase_id", sr.get("purchases_purchase_id")).set("purchases_user_id", sr.get("purchases_user_id")).set("users_region", sr.get("users_region")).set("users_user_id", sr.get("users_user_id")).set("users_name", sr.get("users_name")).set("interests_region", sr.get("interests_region")).set("interests_user_id", sr.get("interests_user_id")).set("interests_interest", sr.get("interests_interest")).build());
        }
    }
    Assert.assertEquals(expected, actual);
    validateMetric(9, appId, "join.records.in");
    validateMetric(expected.size(), appId, "join.records.out");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) WorkflowManager(io.cdap.cdap.test.WorkflowManager) Schema(io.cdap.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 78 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class AutoJoinerTest method testQuadAutoJoin.

private void testQuadAutoJoin(List<String> required, List<String> broadcast, Set<StructuredRecord> expected, Engine engine, List<String> tablesInOrderToJoin) throws Exception {
    /*
         users ------|
                     |
         purchases --|--> join --> sink
                     |
         interests --|
                     |
         age --------|

         joinOn: users.region = purchases.region = interests.region = age.region and
                 users.user_id = purchases.user_id = interests.user_id = age.user_id
     */
    String userInput = UUID.randomUUID().toString();
    String purchaseInput = UUID.randomUUID().toString();
    String interestInput = UUID.randomUUID().toString();
    String ageInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput, USER_SCHEMA))).addStage(new ETLStage("purchases", MockSource.getPlugin(purchaseInput, PURCHASE_SCHEMA))).addStage(new ETLStage("interests", MockSource.getPlugin(interestInput, INTEREST_SCHEMA))).addStage(new ETLStage("ages", MockSource.getPlugin(ageInput, AGE_SCHEMA))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(tablesInOrderToJoin, Arrays.asList("region", "user_id"), required, broadcast, Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("purchases", "join").addConnection("interests", "join").addConnection("ages", "join").addConnection("join", "sink").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    List<StructuredRecord> userData = Arrays.asList(USER_ALICE, USER_ALYCE, USER_BOB);
    DataSetManager<Table> inputManager = getDataset(userInput);
    MockSource.writeInput(inputManager, userData);
    List<StructuredRecord> purchaseData = new ArrayList<>();
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 0).set("purchase_id", 123).build());
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 2).set("purchase_id", 456).build());
    inputManager = getDataset(purchaseInput);
    MockSource.writeInput(inputManager, purchaseData);
    List<StructuredRecord> interestData = new ArrayList<>();
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 0).set("interest", "food").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 0).set("interest", "sports").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 1).set("interest", "gardening").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 2).set("interest", "gaming").build());
    inputManager = getDataset(interestInput);
    MockSource.writeInput(inputManager, interestData);
    List<StructuredRecord> ageData = new ArrayList<>();
    ageData.add(StructuredRecord.builder(AGE_SCHEMA).set("region", "us").set("user_id", 10).set("age", 20).build());
    ageData.add(StructuredRecord.builder(AGE_SCHEMA).set("region", "us").set("user_id", 1).set("age", 30).build());
    inputManager = getDataset(ageInput);
    MockSource.writeInput(inputManager, ageData);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Set<StructuredRecord> actual = new HashSet<>();
    Schema expectedSchema = expected.iterator().hasNext() ? expected.iterator().next().getSchema() : null;
    if (expectedSchema == null || expected.iterator().next().getSchema() == outputRecords.get(0).getSchema()) {
        actual = new HashSet<>(outputRecords);
    } else {
        // reorder the output columns of the join result (actual) to match the column order of expected
        for (StructuredRecord sr : outputRecords) {
            actual.add(StructuredRecord.builder(expectedSchema).set("ages_region", sr.get("ages_region")).set("ages_age", sr.get("ages_age")).set("ages_user_id", sr.get("ages_user_id")).set("purchases_region", sr.get("purchases_region")).set("purchases_purchase_id", sr.get("purchases_purchase_id")).set("purchases_user_id", sr.get("purchases_user_id")).set("users_region", sr.get("users_region")).set("users_user_id", sr.get("users_user_id")).set("users_name", sr.get("users_name")).set("interests_region", sr.get("interests_region")).set("interests_user_id", sr.get("interests_user_id")).set("interests_interest", sr.get("interests_interest")).build());
        }
    }
    Assert.assertEquals(expected, actual);
    validateMetric(11, appId, "join.records.in");
    validateMetric(expected.size(), appId, "join.records.out");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) WorkflowManager(io.cdap.cdap.test.WorkflowManager) Schema(io.cdap.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 79 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class AutoJoinerTest method testCaseSensitivity.

@Test
public void testCaseSensitivity() throws Exception {
    Schema weird1 = Schema.recordOf("weird1", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("ID", Schema.of(Schema.Type.LONG)), Schema.Field.of("Id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    Schema weird2 = Schema.recordOf("weird2", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("ID", Schema.of(Schema.Type.LONG)), Schema.Field.of("val", Schema.of(Schema.Type.STRING)));
    String input1 = UUID.randomUUID().toString();
    String input2 = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("i1", MockSource.getPlugin(input1, weird1))).addStage(new ETLStage("i2", MockSource.getPlugin(input2, weird2))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(Arrays.asList("i1", "i2"), Arrays.asList("id", "ID"), Arrays.asList("i1", "i2"), Collections.emptyList(), Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("i1", "join").addConnection("i2", "join").addConnection("join", "sink").setEngine(Engine.SPARK).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    List<StructuredRecord> input1Data = new ArrayList<>();
    input1Data.add(StructuredRecord.builder(weird1).set("id", 0).set("ID", 99L).set("Id", 0).set("name", "zero").build());
    input1Data.add(StructuredRecord.builder(weird1).set("id", 1).set("ID", 0L).set("Id", 0).set("name", "one").build());
    DataSetManager<Table> inputManager = getDataset(input1);
    MockSource.writeInput(inputManager, input1Data);
    List<StructuredRecord> input2Data = new ArrayList<>();
    input2Data.add(StructuredRecord.builder(weird2).set("id", 0).set("ID", 99L).set("val", "0").build());
    input2Data.add(StructuredRecord.builder(weird2).set("id", 1).set("ID", 99L).set("val", "1").build());
    input2Data.add(StructuredRecord.builder(weird2).set("id", 0).set("ID", 0L).set("val", "2").build());
    inputManager = getDataset(input2);
    MockSource.writeInput(inputManager, input2Data);
    Schema expectedSchema = Schema.recordOf("i1.i2", Schema.Field.of("i1_id", Schema.of(Schema.Type.INT)), Schema.Field.of("i1_ID", Schema.of(Schema.Type.LONG)), Schema.Field.of("i1_Id", Schema.of(Schema.Type.INT)), Schema.Field.of("i1_name", Schema.of(Schema.Type.STRING)), Schema.Field.of("i2_id", Schema.of(Schema.Type.INT)), Schema.Field.of("i2_ID", Schema.of(Schema.Type.LONG)), Schema.Field.of("i2_val", Schema.of(Schema.Type.STRING)));
    StructuredRecord expected = StructuredRecord.builder(expectedSchema).set("i1_id", 0).set("i1_ID", 99L).set("i1_Id", 0).set("i1_name", "zero").set("i2_id", 0).set("i2_ID", 99L).set("i2_val", "0").build();
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    Map<String, String> args = Collections.singletonMap(MockAutoJoiner.PARTITIONS_ARGUMENT, "1");
    workflowManager.startAndWaitForGoodRun(args, ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> actual = MockSink.readOutput(outputManager);
    Assert.assertEquals(Collections.singletonList(expected), actual);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 80 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class AutoJoinerTest method testAutoJoinWithMacros.

private void testAutoJoinWithMacros(Engine engine, List<String> required, Schema expectedSchema, Set<StructuredRecord> expectedRecords, boolean excludeUsers, boolean excludePurchases) throws Exception {
    /*
         users ------|
                     |--> join --> sink
         purchases --|

         joinOn: users.region = purchases.region and users.user_id = purchases.user_id
     */
    String userInput = UUID.randomUUID().toString();
    String purchaseInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    Map<String, String> joinerProps = new HashMap<>();
    joinerProps.put(MockAutoJoiner.Conf.STAGES, "${stages}");
    joinerProps.put(MockAutoJoiner.Conf.KEY, "${key}");
    joinerProps.put(MockAutoJoiner.Conf.REQUIRED, "${required}");
    joinerProps.put(MockAutoJoiner.Conf.SELECT, "${select}");
    if (engine == Engine.SPARK || (required.size() < 2 && engine == Engine.MAPREDUCE)) {
        joinerProps.put(MockAutoJoiner.Conf.SCHEMA, "${schema}");
    }
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput))).addStage(new ETLStage("purchases", MockSource.getPlugin(purchaseInput))).addStage(new ETLStage("join", new ETLPlugin(MockAutoJoiner.NAME, BatchJoiner.PLUGIN_TYPE, joinerProps))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("purchases", "join").addConnection("join", "sink").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    if (!excludeUsers) {
        List<StructuredRecord> userData = Arrays.asList(USER_ALICE, USER_ALYCE, USER_BOB);
        DataSetManager<Table> inputManager = getDataset(userInput);
        MockSource.writeInput(inputManager, userData);
    }
    if (!excludePurchases) {
        List<StructuredRecord> purchaseData = new ArrayList<>();
        purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 0).set("purchase_id", 123).build());
        purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 2).set("purchase_id", 456).build());
        DataSetManager<Table> inputManager = getDataset(purchaseInput);
        MockSource.writeInput(inputManager, purchaseData);
    }
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    List<JoinField> selectedFields = new ArrayList<>();
    selectedFields.add(new JoinField("purchases", "region"));
    selectedFields.add(new JoinField("purchases", "purchase_id"));
    selectedFields.add(new JoinField("purchases", "user_id"));
    selectedFields.add(new JoinField("users", "name"));
    Map<String, String> joinerProperties = MockAutoJoiner.getProperties(Arrays.asList("purchases", "users"), Arrays.asList("region", "user_id"), required, Collections.emptyList(), selectedFields, true);
    Map<String, String> runtimeArgs = new HashMap<>();
    runtimeArgs.put("stages", joinerProperties.get(MockAutoJoiner.Conf.STAGES));
    runtimeArgs.put("key", joinerProperties.get(MockAutoJoiner.Conf.KEY));
    runtimeArgs.put("required", joinerProperties.get(MockAutoJoiner.Conf.REQUIRED));
    runtimeArgs.put("select", joinerProperties.get(MockAutoJoiner.Conf.SELECT));
    runtimeArgs.put("schema", expectedSchema.toString());
    workflowManager.startAndWaitForGoodRun(runtimeArgs, ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Assert.assertEquals(expectedRecords, new HashSet<>(outputRecords));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) HashMap(java.util.HashMap) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) JoinField(io.cdap.cdap.etl.api.join.JoinField) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Aggregations

ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)121 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)121 Test (org.junit.Test)117 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)79 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)76 ApplicationManager (io.cdap.cdap.test.ApplicationManager)74 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)72 WorkflowManager (io.cdap.cdap.test.WorkflowManager)72 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)70 Table (io.cdap.cdap.api.dataset.table.Table)68 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)64 Schema (io.cdap.cdap.api.data.schema.Schema)58 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)46 AppRequest (co.cask.cdap.proto.artifact.AppRequest)43 ApplicationId (co.cask.cdap.proto.id.ApplicationId)43 ApplicationManager (co.cask.cdap.test.ApplicationManager)40 WorkflowManager (co.cask.cdap.test.WorkflowManager)39 Table (co.cask.cdap.api.dataset.table.Table)36 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)33 ArrayList (java.util.ArrayList)33