use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class DataPipelineTest method testPostAction.
@Test
public void testPostAction() throws Exception {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin("actionInput"))).addStage(new ETLStage("sink", MockSink.getPlugin("actionOutput"))).addPostAction(new ETLStage("tokenWriter", NodeStatesAction.getPlugin("tokenTable"))).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ActionApp");
ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("actionInput"));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
DataSetManager<Table> tokenTableManager = getDataset(NamespaceId.DEFAULT.dataset("tokenTable"));
Table tokenTable = tokenTableManager.get();
NodeStatus status = NodeStatus.valueOf(Bytes.toString(tokenTable.get(Bytes.toBytes("phase-1"), Bytes.toBytes("status"))));
Assert.assertEquals(NodeStatus.COMPLETED, status);
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class PreviewDataPipelineTest method testDataPipelinePreviewRun.
private void testDataPipelinePreviewRun(Engine engine) throws Exception {
PreviewManager previewManager = getPreviewManager();
String sourceTableName = "singleInput";
String sinkTableName = "singleOutput";
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* source --> transform -> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setEngine(engine).setNumOfRecordsPreview(100).build();
// Construct the preview config with the program name and program type
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
// Create the table for the mock source
addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig, previewConfig);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
final PreviewRunner previewRunner = previewManager.getRunner(previewId);
// Wait for the preview status go into COMPLETED.
Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewRunner.getStatus();
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
// Get the data for stage "source" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "source", 2);
// Get the data for stage "transform" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "transform", 2);
// Get the data for stage "sink" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "sink", 2);
// Validate the metrics for preview
validateMetric(2, previewId, "source.records.in", previewRunner);
validateMetric(2, previewId, "source.records.out", previewRunner);
validateMetric(2, previewId, "transform.records.in", previewRunner);
validateMetric(2, previewId, "transform.records.out", previewRunner);
validateMetric(2, previewId, "sink.records.out", previewRunner);
validateMetric(2, previewId, "sink.records.in", previewRunner);
// Check the sink table is not created in the real space.
DataSetManager<Table> sinkManager = getDataset(sinkTableName);
Assert.assertNull(sinkManager.get());
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class PreviewDataPipelineTest method testPreviewFailedRun.
private void testPreviewFailedRun(Engine engine) throws Exception {
PreviewManager previewManager = getPreviewManager();
String sourceTableName = "singleInput";
String sinkTableName = "singleOutput";
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* source --> transform -> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", ExceptionTransform.getPlugin("name", "samuel"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setNumOfRecordsPreview(100).setEngine(engine).build();
// Construct the preview config with the program name and program type.
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
// Create the table for the mock source
addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
MockSource.writeInput(inputManager, "1", recordSamuel);
MockSource.writeInput(inputManager, "2", recordBob);
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
final PreviewRunner previewRunner = previewManager.getRunner(previewId);
// Wait for the preview status go into FAILED.
Tasks.waitFor(PreviewStatus.Status.RUN_FAILED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewRunner.getStatus();
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
// Get the data for stage "source" in the PreviewStore.
checkPreviewStore(previewRunner, "source", 2);
// Get the data for stage "transform" in the PreviewStore, should contain one less record than source.
checkPreviewStore(previewRunner, "transform", 1);
// Get the data for stage "sink" in the PreviewStore, should contain one less record than source.
checkPreviewStore(previewRunner, "sink", 1);
// Validate the metrics for preview
validateMetric(2, previewId, "source.records.in", previewRunner);
validateMetric(2, previewId, "source.records.out", previewRunner);
validateMetric(2, previewId, "transform.records.in", previewRunner);
validateMetric(1, previewId, "transform.records.out", previewRunner);
validateMetric(1, previewId, "sink.records.out", previewRunner);
validateMetric(1, previewId, "sink.records.in", previewRunner);
// Check the sink table is not created in the real space.
DataSetManager<Table> sinkManager = getDataset(sinkTableName);
Assert.assertNull(sinkManager.get());
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class DataStreamsSparkSinkTest method testSparkSink.
@Test
public // stream-rate-updater thread in Spark.
void testSparkSink() throws Exception {
Schema schema = Schema.recordOf("test", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> input = new ArrayList<>();
StructuredRecord samuelRecord = StructuredRecord.builder(schema).set("id", "0").set("name", "samuel").build();
StructuredRecord jacksonRecord = StructuredRecord.builder(schema).set("id", "1").set("name", "jackson").build();
StructuredRecord dwayneRecord = StructuredRecord.builder(schema).set("id", "2").set("name", "dwayne").build();
StructuredRecord johnsonRecord = StructuredRecord.builder(schema).set("id", "3").set("name", "johnson").build();
input.add(samuelRecord);
input.add(jacksonRecord);
input.add(dwayneRecord);
input.add(johnsonRecord);
DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(schema, input))).addStage(new ETLStage("sink", co.cask.cdap.etl.mock.spark.streaming.MockSink.getPlugin("${tablename}"))).addConnection("source", "sink").setBatchInterval("1s").build();
ApplicationId appId = NamespaceId.DEFAULT.app("sparksinkapp");
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
testSparkSink(appManager, "output1");
testSparkSink(appManager, "output2");
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class DataStreamsTest method testTransformComputeWithMacros.
@Test
public void testTransformComputeWithMacros() throws Exception {
Schema schema = Schema.recordOf("test", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> input = new ArrayList<>();
StructuredRecord samuelRecord = StructuredRecord.builder(schema).set("id", "123").set("name", "samuel").build();
StructuredRecord jacksonRecord = StructuredRecord.builder(schema).set("id", "456").set("name", "jackson").build();
StructuredRecord dwayneRecord = StructuredRecord.builder(schema).set("id", "789").set("name", "dwayne").build();
StructuredRecord johnsonRecord = StructuredRecord.builder(schema).set("id", "0").set("name", "johnson").build();
input.add(samuelRecord);
input.add(jacksonRecord);
input.add(dwayneRecord);
input.add(johnsonRecord);
DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(schema, input))).addStage(new ETLStage("sink", MockSink.getPlugin("${output}"))).addStage(new ETLStage("filter1", StringValueFilterTransform.getPlugin("${field}", "${val1}"))).addStage(new ETLStage("filter2", StringValueFilterCompute.getPlugin("${field}", "${val2}"))).addStage(new ETLStage("sleep", SleepTransform.getPlugin(2L))).addConnection("source", "sleep").addConnection("sleep", "filter1").addConnection("filter1", "filter2").addConnection("filter2", "sink").setBatchInterval("1s").build();
ApplicationId appId = NamespaceId.DEFAULT.app("simpleApp");
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
final Set<StructuredRecord> expected = new HashSet<>();
expected.add(samuelRecord);
expected.add(jacksonRecord);
testTransformComputeRun(appManager, expected, "dwayne", "johnson", "macroOutput1");
validateMetric(appId, "source.records.out", 4);
validateMetric(appId, "sleep.records.in", 4);
validateMetric(appId, "sleep.records.out", 4);
validateMetric(appId, "filter1.records.in", 4);
validateMetric(appId, "filter1.records.out", 3);
validateMetric(appId, "filter2.records.in", 3);
validateMetric(appId, "filter2.records.out", 2);
validateMetric(appId, "sink.records.in", 2);
Assert.assertTrue(getMetric(appId, "sleep." + co.cask.cdap.etl.common.Constants.Metrics.TOTAL_TIME) > 0L);
expected.clear();
expected.add(dwayneRecord);
expected.add(johnsonRecord);
testTransformComputeRun(appManager, expected, "samuel", "jackson", "macroOutput2");
}
Aggregations