Search in sources :

Example 16 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class DataPipelineTest method testPipelineWithActions.

private void testPipelineWithActions(Engine engine) throws Exception {
    String actionTable = "actionTable-" + engine;
    String action1RowKey = "action1.row";
    String action1ColumnKey = "action1.column";
    String action1Value = "action1.value";
    String action2RowKey = "action2.row";
    String action2ColumnKey = "action2.column";
    String action2Value = "action2.value";
    String action3RowKey = "action3.row";
    String action3ColumnKey = "action3.column";
    String action3Value = "action3.value";
    String sourceName = "actionSource-" + engine;
    String sinkName = "actionSink-" + engine;
    String sourceTableName = "actionSourceTable-" + engine;
    String sinkTableName = "actionSinkTable-" + engine;
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, action1RowKey, action1ColumnKey, action1Value))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, action2RowKey, action2ColumnKey, action2Value))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, action3RowKey, action3ColumnKey, action3Value))).addStage(new ETLStage(sourceName, MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage(sinkName, MockSink.getPlugin(sinkTableName))).addConnection(sourceName, sinkName).addConnection("action1", "action2").addConnection("action2", sourceName).addConnection(sinkName, "action3").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("MyApp-" + engine);
    ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    // write records to source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // check sink
    DataSetManager<Table> sinkManager = getDataset(sinkTableName);
    Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    DataSetManager<Table> actionTableDS = getDataset(actionTable);
    Assert.assertEquals(action1Value, MockAction.readOutput(actionTableDS, action1RowKey, action1ColumnKey));
    Assert.assertEquals(action2Value, MockAction.readOutput(actionTableDS, action2RowKey, action2ColumnKey));
    Assert.assertEquals(action3Value, MockAction.readOutput(actionTableDS, action3RowKey, action3ColumnKey));
    validateMetric(2, appId, sourceName + ".records.out");
    validateMetric(2, appId, sinkName + ".records.in");
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) WorkflowManager(co.cask.cdap.test.WorkflowManager) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ApplicationId(co.cask.cdap.proto.id.ApplicationId)

Example 17 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class DataPipelineTest method testPostAction.

@Test
public void testPostAction() throws Exception {
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin("actionInput"))).addStage(new ETLStage("sink", MockSink.getPlugin("actionOutput"))).addPostAction(new ETLStage("tokenWriter", NodeStatesAction.getPlugin("tokenTable"))).addConnection("source", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("ActionApp");
    ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("actionInput"));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> tokenTableManager = getDataset(NamespaceId.DEFAULT.dataset("tokenTable"));
    Table tokenTable = tokenTableManager.get();
    NodeStatus status = NodeStatus.valueOf(Bytes.toString(tokenTable.get(Bytes.toBytes("phase-1"), Bytes.toBytes("status"))));
    Assert.assertEquals(NodeStatus.COMPLETED, status);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Schema(co.cask.cdap.api.data.schema.Schema) WorkflowManager(co.cask.cdap.test.WorkflowManager) ApplicationId(co.cask.cdap.proto.id.ApplicationId) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) NodeStatus(co.cask.cdap.api.workflow.NodeStatus) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 18 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class PreviewDataPipelineTest method testDataPipelinePreviewRun.

private void testDataPipelinePreviewRun(Engine engine) throws Exception {
    PreviewManager previewManager = getPreviewManager();
    String sourceTableName = "singleInput";
    String sinkTableName = "singleOutput";
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     * source --> transform -> sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setEngine(engine).setNumOfRecordsPreview(100).build();
    // Construct the preview config with the program name and program type
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
    // Create the table for the mock source
    addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    final PreviewRunner previewRunner = previewManager.getRunner(previewId);
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewRunner.getStatus();
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    // Get the data for stage "source" in the PreviewStore, should contain two records.
    checkPreviewStore(previewRunner, "source", 2);
    // Get the data for stage "transform" in the PreviewStore, should contain two records.
    checkPreviewStore(previewRunner, "transform", 2);
    // Get the data for stage "sink" in the PreviewStore, should contain two records.
    checkPreviewStore(previewRunner, "sink", 2);
    // Validate the metrics for preview
    validateMetric(2, previewId, "source.records.in", previewRunner);
    validateMetric(2, previewId, "source.records.out", previewRunner);
    validateMetric(2, previewId, "transform.records.in", previewRunner);
    validateMetric(2, previewId, "transform.records.out", previewRunner);
    validateMetric(2, previewId, "sink.records.out", previewRunner);
    validateMetric(2, previewId, "sink.records.in", previewRunner);
    // Check the sink table is not created in the real space.
    DataSetManager<Table> sinkManager = getDataset(sinkTableName);
    Assert.assertNull(sinkManager.get());
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
}
Also used : PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) AppRequest(co.cask.cdap.proto.artifact.AppRequest) PreviewManager(co.cask.cdap.app.preview.PreviewManager) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) PreviewRunner(co.cask.cdap.app.preview.PreviewRunner) ApplicationId(co.cask.cdap.proto.id.ApplicationId) PreviewConfig(co.cask.cdap.proto.artifact.preview.PreviewConfig)

Example 19 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class PreviewDataPipelineTest method testPreviewFailedRun.

private void testPreviewFailedRun(Engine engine) throws Exception {
    PreviewManager previewManager = getPreviewManager();
    String sourceTableName = "singleInput";
    String sinkTableName = "singleOutput";
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     * source --> transform -> sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", ExceptionTransform.getPlugin("name", "samuel"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setNumOfRecordsPreview(100).setEngine(engine).build();
    // Construct the preview config with the program name and program type.
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
    // Create the table for the mock source
    addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    MockSource.writeInput(inputManager, "1", recordSamuel);
    MockSource.writeInput(inputManager, "2", recordBob);
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    final PreviewRunner previewRunner = previewManager.getRunner(previewId);
    // Wait for the preview status go into FAILED.
    Tasks.waitFor(PreviewStatus.Status.RUN_FAILED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewRunner.getStatus();
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    // Get the data for stage "source" in the PreviewStore.
    checkPreviewStore(previewRunner, "source", 2);
    // Get the data for stage "transform" in the PreviewStore, should contain one less record than source.
    checkPreviewStore(previewRunner, "transform", 1);
    // Get the data for stage "sink" in the PreviewStore, should contain one less record than source.
    checkPreviewStore(previewRunner, "sink", 1);
    // Validate the metrics for preview
    validateMetric(2, previewId, "source.records.in", previewRunner);
    validateMetric(2, previewId, "source.records.out", previewRunner);
    validateMetric(2, previewId, "transform.records.in", previewRunner);
    validateMetric(1, previewId, "transform.records.out", previewRunner);
    validateMetric(1, previewId, "sink.records.out", previewRunner);
    validateMetric(1, previewId, "sink.records.in", previewRunner);
    // Check the sink table is not created in the real space.
    DataSetManager<Table> sinkManager = getDataset(sinkTableName);
    Assert.assertNull(sinkManager.get());
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
}
Also used : PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) AppRequest(co.cask.cdap.proto.artifact.AppRequest) PreviewManager(co.cask.cdap.app.preview.PreviewManager) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) PreviewRunner(co.cask.cdap.app.preview.PreviewRunner) ApplicationId(co.cask.cdap.proto.id.ApplicationId) PreviewConfig(co.cask.cdap.proto.artifact.preview.PreviewConfig)

Example 20 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class DataStreamsSparkSinkTest method testSparkSink.

@Test
public // stream-rate-updater thread in Spark.
void testSparkSink() throws Exception {
    Schema schema = Schema.recordOf("test", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    List<StructuredRecord> input = new ArrayList<>();
    StructuredRecord samuelRecord = StructuredRecord.builder(schema).set("id", "0").set("name", "samuel").build();
    StructuredRecord jacksonRecord = StructuredRecord.builder(schema).set("id", "1").set("name", "jackson").build();
    StructuredRecord dwayneRecord = StructuredRecord.builder(schema).set("id", "2").set("name", "dwayne").build();
    StructuredRecord johnsonRecord = StructuredRecord.builder(schema).set("id", "3").set("name", "johnson").build();
    input.add(samuelRecord);
    input.add(jacksonRecord);
    input.add(dwayneRecord);
    input.add(johnsonRecord);
    DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(schema, input))).addStage(new ETLStage("sink", co.cask.cdap.etl.mock.spark.streaming.MockSink.getPlugin("${tablename}"))).addConnection("source", "sink").setBatchInterval("1s").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("sparksinkapp");
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
    testSparkSink(appManager, "output1");
    testSparkSink(appManager, "output2");
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ApplicationId(co.cask.cdap.proto.id.ApplicationId) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) DataStreamsConfig(co.cask.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Aggregations

ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)50 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)37 AppRequest (co.cask.cdap.proto.artifact.AppRequest)35 ApplicationId (co.cask.cdap.proto.id.ApplicationId)35 ApplicationManager (co.cask.cdap.test.ApplicationManager)32 Test (org.junit.Test)31 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)27 Schema (co.cask.cdap.api.data.schema.Schema)26 WorkflowManager (co.cask.cdap.test.WorkflowManager)24 Table (co.cask.cdap.api.dataset.table.Table)23 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)19 ArrayList (java.util.ArrayList)9 HashSet (java.util.HashSet)6 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)5 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)5 File (java.io.File)5 TimeoutException (java.util.concurrent.TimeoutException)5 ETLRealtimeConfig (co.cask.cdap.etl.proto.v2.ETLRealtimeConfig)4 WorkerManager (co.cask.cdap.test.WorkerManager)4 PreviewManager (co.cask.cdap.app.preview.PreviewManager)3