Search in sources :

Example 21 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PreviewDataPipelineTest method testPreviewFailedRun.

private void testPreviewFailedRun(Engine engine) throws Exception {
    PreviewManager previewManager = getPreviewManager();
    String sourceTableName = "singleInput";
    String sinkTableName = "singleOutput";
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     * source --> transform -> sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", ExceptionTransform.getPlugin("name", "samuel"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setNumOfRecordsPreview(100).setEngine(engine).build();
    // Construct the preview config with the program name and program type.
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
    // Create the table for the mock source
    addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    MockSource.writeInput(inputManager, "1", recordSamuel);
    MockSource.writeInput(inputManager, "2", recordBob);
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    final PreviewRunner previewRunner = previewManager.getRunner(previewId);
    // Wait for the preview status go into FAILED.
    Tasks.waitFor(PreviewStatus.Status.RUN_FAILED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewRunner.getStatus();
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    // Get the data for stage "source" in the PreviewStore.
    checkPreviewStore(previewRunner, "source", 2);
    // Get the data for stage "transform" in the PreviewStore, should contain one less record than source.
    checkPreviewStore(previewRunner, "transform", 1);
    // Get the data for stage "sink" in the PreviewStore, should contain one less record than source.
    checkPreviewStore(previewRunner, "sink", 1);
    // Validate the metrics for preview
    validateMetric(2, previewId, "source.records.in", previewRunner);
    validateMetric(2, previewId, "source.records.out", previewRunner);
    validateMetric(2, previewId, "transform.records.in", previewRunner);
    validateMetric(1, previewId, "transform.records.out", previewRunner);
    validateMetric(1, previewId, "sink.records.out", previewRunner);
    validateMetric(1, previewId, "sink.records.in", previewRunner);
    // Check the sink table is not created in the real space.
    DataSetManager<Table> sinkManager = getDataset(sinkTableName);
    Assert.assertNull(sinkManager.get());
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
}
Also used : PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) AppRequest(co.cask.cdap.proto.artifact.AppRequest) PreviewManager(co.cask.cdap.app.preview.PreviewManager) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) PreviewRunner(co.cask.cdap.app.preview.PreviewRunner) ApplicationId(co.cask.cdap.proto.id.ApplicationId) PreviewConfig(co.cask.cdap.proto.artifact.preview.PreviewConfig)

Example 22 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PreviewDataPipelineTest method testDataPipelinePreviewRun.

private void testDataPipelinePreviewRun(Engine engine) throws Exception {
    PreviewManager previewManager = getPreviewManager();
    String sourceTableName = "singleInput";
    String sinkTableName = "singleOutput";
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     * source --> transform -> sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setEngine(engine).setNumOfRecordsPreview(100).build();
    // Construct the preview config with the program name and program type
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
    // Create the table for the mock source
    addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    final PreviewRunner previewRunner = previewManager.getRunner(previewId);
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewRunner.getStatus();
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    // Get the data for stage "source" in the PreviewStore, should contain two records.
    checkPreviewStore(previewRunner, "source", 2);
    // Get the data for stage "transform" in the PreviewStore, should contain two records.
    checkPreviewStore(previewRunner, "transform", 2);
    // Get the data for stage "sink" in the PreviewStore, should contain two records.
    checkPreviewStore(previewRunner, "sink", 2);
    // Validate the metrics for preview
    validateMetric(2, previewId, "source.records.in", previewRunner);
    validateMetric(2, previewId, "source.records.out", previewRunner);
    validateMetric(2, previewId, "transform.records.in", previewRunner);
    validateMetric(2, previewId, "transform.records.out", previewRunner);
    validateMetric(2, previewId, "sink.records.out", previewRunner);
    validateMetric(2, previewId, "sink.records.in", previewRunner);
    // Check the sink table is not created in the real space.
    DataSetManager<Table> sinkManager = getDataset(sinkTableName);
    Assert.assertNull(sinkManager.get());
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
    Assert.assertNotNull(previewRunner.getRunRecord());
}
Also used : PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) AppRequest(co.cask.cdap.proto.artifact.AppRequest) PreviewManager(co.cask.cdap.app.preview.PreviewManager) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) PreviewRunner(co.cask.cdap.app.preview.PreviewRunner) ApplicationId(co.cask.cdap.proto.id.ApplicationId) PreviewConfig(co.cask.cdap.proto.artifact.preview.PreviewConfig)

Example 23 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PreviewDataPipelineTest method testMultiplePhase.

private void testMultiplePhase(Engine engine) throws Exception {
    /*
     * source1 ----> t1 ------
     *                        | --> innerjoin ----> t4 ------
     * source2 ----> t2 ------                                 |
     *                                                         | ---> outerjoin --> sink1
     *                                                         |
     * source3 -------------------- t3 ------------------------
     */
    PreviewManager previewManager = getPreviewManager();
    Schema inputSchema1 = Schema.recordOf("customerRecord", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema2 = Schema.recordOf("itemRecord", Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_price", Schema.of(Schema.Type.LONG)), Schema.Field.of("cust_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema3 = Schema.recordOf("transactionRecord", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("i_id", Schema.of(Schema.Type.STRING)));
    Schema outSchema2 = Schema.recordOf("join.output", Schema.Field.of("t_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("i_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_price", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("cust_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("cust_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    String source1MulitJoinInput = "multiJoinSource1-" + engine;
    String source2MultiJoinInput = "multiJoinSource2-" + engine;
    String source3MultiJoinInput = "multiJoinSource3-" + engine;
    String outputName = "multiJoinOutput-" + engine;
    String sinkName = "multiJoinOutputSink-" + engine;
    String outerJoinName = "multiJoinOuter-" + engine;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source1", MockSource.getPlugin(source1MulitJoinInput, inputSchema1))).addStage(new ETLStage("source2", MockSource.getPlugin(source2MultiJoinInput, inputSchema2))).addStage(new ETLStage("source3", MockSource.getPlugin(source3MultiJoinInput, inputSchema3))).addStage(new ETLStage("t1", IdentityTransform.getPlugin())).addStage(new ETLStage("t2", IdentityTransform.getPlugin())).addStage(new ETLStage("t3", IdentityTransform.getPlugin())).addStage(new ETLStage("t4", IdentityTransform.getPlugin())).addStage(new ETLStage("innerjoin", MockJoiner.getPlugin("t1.customer_id=t2.cust_id", "t1,t2", ""))).addStage(new ETLStage(outerJoinName, MockJoiner.getPlugin("t4.item_id=t3.i_id", "", ""))).addStage(new ETLStage(sinkName, MockSink.getPlugin(outputName))).addConnection("source1", "t1").addConnection("source2", "t2").addConnection("source3", "t3").addConnection("t1", "innerjoin").addConnection("t2", "innerjoin").addConnection("innerjoin", "t4").addConnection("t3", outerJoinName).addConnection("t4", outerJoinName).addConnection(outerJoinName, sinkName).setEngine(engine).setNumOfRecordsPreview(100).build();
    // Construct the preview config with the program name and program type
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
    // Create the table for the mock source
    addDatasetInstance(Table.class.getName(), source1MulitJoinInput, DatasetProperties.of(ImmutableMap.of("schema", inputSchema1.toString())));
    addDatasetInstance(Table.class.getName(), source2MultiJoinInput, DatasetProperties.of(ImmutableMap.of("schema", inputSchema2.toString())));
    addDatasetInstance(Table.class.getName(), source3MultiJoinInput, DatasetProperties.of(ImmutableMap.of("schema", inputSchema3.toString())));
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    final PreviewRunner previewRunner = previewManager.getRunner(previewId);
    ingestData(inputSchema1, inputSchema2, inputSchema3, source1MulitJoinInput, source2MultiJoinInput, source3MultiJoinInput);
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewRunner.getStatus();
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    checkPreviewStore(previewRunner, sinkName, 3);
    validateMetric(3L, previewId, sinkName + ".records.in", previewRunner);
}
Also used : PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) TimeoutException(java.util.concurrent.TimeoutException) AppRequest(co.cask.cdap.proto.artifact.AppRequest) PreviewManager(co.cask.cdap.app.preview.PreviewManager) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) PreviewRunner(co.cask.cdap.app.preview.PreviewRunner) ApplicationId(co.cask.cdap.proto.id.ApplicationId) PreviewConfig(co.cask.cdap.proto.artifact.preview.PreviewConfig)

Example 24 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testConflictingInputSchemas.

@Test(expected = IllegalArgumentException.class)
public void testConflictingInputSchemas() {
    /*
     *           ---- transformA ----
     *           |                  |
     * source ---                   |--- sink
     *           |                  |
     *           ---- transformB ----
     *
     * sink gets schema A and schema B as input, should fail
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink", MOCK_SINK)).addStage(new ETLStage("tA", MOCK_TRANSFORM_A)).addStage(new ETLStage("tB", MOCK_TRANSFORM_B)).addConnection("source", "tA").addConnection("source", "tB").addConnection("tA", "sink").addConnection("tB", "sink").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 25 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testBadErrorTransformInput.

@Test(expected = IllegalArgumentException.class)
public void testBadErrorTransformInput() {
    /*
     * source --> joiner --> error --> sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("joiner", MOCK_JOINER)).addStage(new ETLStage("error", MOCK_ERROR)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("source", "joiner").addConnection("joiner", "error").addConnection("error", "sink").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Aggregations

ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)121 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)121 Test (org.junit.Test)117 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)79 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)76 ApplicationManager (io.cdap.cdap.test.ApplicationManager)74 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)72 WorkflowManager (io.cdap.cdap.test.WorkflowManager)72 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)70 Table (io.cdap.cdap.api.dataset.table.Table)68 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)64 Schema (io.cdap.cdap.api.data.schema.Schema)58 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)46 AppRequest (co.cask.cdap.proto.artifact.AppRequest)43 ApplicationId (co.cask.cdap.proto.id.ApplicationId)43 ApplicationManager (co.cask.cdap.test.ApplicationManager)40 WorkflowManager (co.cask.cdap.test.WorkflowManager)39 Table (co.cask.cdap.api.dataset.table.Table)36 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)33 ArrayList (java.util.ArrayList)33