Search in sources :

Example 46 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testConflictingPipelineProperties.

@Test(expected = IllegalArgumentException.class)
public void testConflictingPipelineProperties() throws ValidationException {
    // populate some mock plugins.
    MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
    Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").build(), artifactIds);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop1", "val2").build(), artifactIds);
    Map<String, String> empty = ImmutableMap.of();
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
    new BatchPipelineSpecGenerator(NamespaceId.DEFAULT.getNamespace(), pluginConfigurer, null, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), Engine.MAPREDUCE, MOCK_FEATURE_FLAGS_PROVIDER).generateSpec(config);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) BatchPipelineSpecGenerator(io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) MockPluginConfigurer(io.cdap.cdap.etl.common.MockPluginConfigurer) Test(org.junit.Test)

Example 47 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSimpleConditionConnectionWithMultipleTrueBranches.

@Test(expected = IllegalArgumentException.class)
public void testSimpleConditionConnectionWithMultipleTrueBranches() throws ValidationException {
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("condition", MOCK_CONDITION)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_B)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addConnection("source", "condition").addConnection("condition", "t1", true).addConnection("condition", "t2", true).addConnection("t1", "sink1").addConnection("t2", "sink2").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 48 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testActionInPipelineMiddle.

@Test
public void testActionInPipelineMiddle() throws ValidationException {
    /*
     * source1 --> sink1 --> action --> source2 --> sink2
     */
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("source1", MOCK_SOURCE)).addStage(new ETLStage("source2", MOCK_SOURCE)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("action", MOCK_ACTION)).addConnection("source1", "sink1").addConnection("sink1", "action").addConnection("action", "source2").addConnection("source2", "sink2").build();
    try {
        specGenerator.generateSpec(config);
        Assert.fail("Did not fail a pipeline with an action in the middle");
    } catch (IllegalArgumentException e) {
    // expected
    }
    /*
     * action1 --> source --> action2 --> sink --> action3
     */
    config = ETLBatchConfig.builder().addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink", MOCK_SINK)).addStage(new ETLStage("action1", MOCK_ACTION)).addStage(new ETLStage("action2", MOCK_ACTION)).addStage(new ETLStage("action3", MOCK_ACTION)).addConnection("action1", "source").addConnection("source", "action2").addConnection("action2", "sink").addConnection("sink", "action3").build();
    try {
        specGenerator.generateSpec(config);
        Assert.fail("Did not fail a pipeline with an action in the middle");
    } catch (IllegalArgumentException e) {
    // expected
    }
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 49 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class DataPipelineTest method testOuterJoin.

private void testOuterJoin(Engine engine) throws Exception {
    Schema inputSchema1 = Schema.recordOf("customerRecord", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema2 = Schema.recordOf("itemRecord", Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_price", Schema.of(Schema.Type.LONG)), Schema.Field.of("cust_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema3 = Schema.recordOf("transactionRecord", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_name", Schema.of(Schema.Type.STRING)));
    String input1Name = "source1OuterJoinInput-" + engine;
    String input2Name = "source2OuterJoinInput-" + engine;
    String input3Name = "source3OuterJoinInput-" + engine;
    String outputName = "outerJoinOutput-" + engine;
    String joinerName = "outerJoiner-" + engine;
    String sinkName = "outerJoinSink-" + engine;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source1", MockSource.getPlugin(input1Name, inputSchema1))).addStage(new ETLStage("source2", MockSource.getPlugin(input2Name, inputSchema2))).addStage(new ETLStage("source3", MockSource.getPlugin(input3Name, inputSchema3))).addStage(new ETLStage("t1", IdentityTransform.getPlugin())).addStage(new ETLStage("t2", IdentityTransform.getPlugin())).addStage(new ETLStage("t3", IdentityTransform.getPlugin())).addStage(new ETLStage(joinerName, MockJoiner.getPlugin("t1.customer_id=t2.cust_id=t3.c_id&" + "t1.customer_name=t2.cust_name=t3.c_name", "t1", ""))).addStage(new ETLStage(sinkName, MockSink.getPlugin(outputName))).addConnection("source1", "t1").addConnection("source2", "t2").addConnection("source3", "t3").addConnection("t1", joinerName).addConnection("t2", joinerName).addConnection("t3", joinerName).addConnection(joinerName, sinkName).setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("OuterJoinApp-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Schema outSchema = Schema.recordOf("join.output", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_price", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("cust_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("cust_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("t_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
    StructuredRecord recordMartha = StructuredRecord.builder(inputSchema1).set("customer_id", "4").set("customer_name", "martha").build();
    StructuredRecord recordCar = StructuredRecord.builder(inputSchema2).set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").build();
    StructuredRecord recordBike = StructuredRecord.builder(inputSchema2).set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").build();
    StructuredRecord recordTrasCar = StructuredRecord.builder(inputSchema3).set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
    StructuredRecord recordTrasPlane = StructuredRecord.builder(inputSchema3).set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
    StructuredRecord recordTrasBike = StructuredRecord.builder(inputSchema3).set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
    // write one record to each source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(input1Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane, recordMartha));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(input2Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordCar, recordBike));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(input3Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordTrasCar, recordTrasPlane, recordTrasBike));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    StructuredRecord joinRecordSamuel = StructuredRecord.builder(outSchema).set("customer_id", "1").set("customer_name", "samuel").set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
    StructuredRecord joinRecordBob = StructuredRecord.builder(outSchema).set("customer_id", "2").set("customer_name", "bob").set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
    StructuredRecord joinRecordJane = StructuredRecord.builder(outSchema).set("customer_id", "3").set("customer_name", "jane").set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
    StructuredRecord joinRecordMartha = StructuredRecord.builder(outSchema).set("customer_id", "4").set("customer_name", "martha").build();
    DataSetManager<Table> sinkManager = getDataset(outputName);
    Set<StructuredRecord> expected = ImmutableSet.of(joinRecordSamuel, joinRecordJane, joinRecordBob, joinRecordMartha);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    validateMetric(4, appId, joinerName + ".records.out");
    validateMetric(4, appId, sinkName + ".records.in");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Example 50 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class DataPipelineTest method runHeadTriggeringPipeline.

private void runHeadTriggeringPipeline(Engine engine, String expectedValue1, String expectedValue2) throws Exception {
    // set runtime arguments
    Map<String, String> runtimeArguments = ImmutableMap.of("head-arg", expectedValue1);
    ETLStage action1 = new ETLStage("action1", MockAction.getPlugin("actionTable", "action1.row", "action1.column", expectedValue2));
    ETLBatchConfig etlConfig = io.cdap.cdap.etl.proto.v2.ETLBatchConfig.builder().addStage(action1).setEngine(engine).build();
    AppRequest<io.cdap.cdap.etl.proto.v2.ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("head");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    manager.setRuntimeArgs(runtimeArguments);
    manager.start(ImmutableMap.of("logical.start.time", "0"));
    manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) AppRequest(io.cdap.cdap.proto.artifact.AppRequest)

Aggregations

ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)121 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)121 Test (org.junit.Test)117 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)79 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)76 ApplicationManager (io.cdap.cdap.test.ApplicationManager)74 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)72 WorkflowManager (io.cdap.cdap.test.WorkflowManager)72 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)70 Table (io.cdap.cdap.api.dataset.table.Table)68 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)64 Schema (io.cdap.cdap.api.data.schema.Schema)58 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)46 AppRequest (co.cask.cdap.proto.artifact.AppRequest)43 ApplicationId (co.cask.cdap.proto.id.ApplicationId)43 ApplicationManager (co.cask.cdap.test.ApplicationManager)40 WorkflowManager (co.cask.cdap.test.WorkflowManager)39 Table (co.cask.cdap.api.dataset.table.Table)36 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)33 ArrayList (java.util.ArrayList)33