use of io.cdap.cdap.etl.proto.v2.DataStreamsConfig in project cdap by cdapio.
the class DataStreamsApp method configure.
@Override
public void configure() {
DataStreamsConfig config = getConfig();
setDescription(Objects.firstNonNull(config.getDescription(), "Data Streams Application"));
DataStreamsPipelineSpec spec;
try {
spec = new DataStreamsPipelineSpecGenerator(getConfigurer().getDeployedNamespace(), getConfigurer(), getConfigurer().getRuntimeConfigurer(), ImmutableSet.of(StreamingSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE, SparkSink.PLUGIN_TYPE, AlertPublisher.PLUGIN_TYPE), getConfigurer()).generateSpec(config);
} catch (ValidationException e) {
throw new IllegalArgumentException(String.format("Failed to configure pipeline: %s", e.getFailures().isEmpty() ? e.getMessage() : e.getFailures().iterator().next().getFullMessage()), e);
}
addSpark(new DataStreamsSparkLauncher(spec));
}
use of io.cdap.cdap.etl.proto.v2.DataStreamsConfig in project cdap by cdapio.
the class DataStreamsTest method testErrorTransform.
@Test
public void testErrorTransform() throws Exception {
Schema inputSchema = Schema.recordOf("user", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(inputSchema).set("name", "Leo").build(), StructuredRecord.builder(inputSchema).set("name", "Ralph").build(), StructuredRecord.builder(inputSchema).set("name", "Don").build(), StructuredRecord.builder(inputSchema).set("name", "Mike").build(), StructuredRecord.builder(inputSchema).set("name", "April").build());
/*
*
* source--> filter1 --> filter2 --> agg1 --> agg2
* | | | |
* |-----------|---------|--------|--------|--> flatten errors --> sink1
* |
* |--> filter errors --> sink2
* arrows coming out the right represent output records
* arrows coming out the bottom represent error records
* this will test multiple stages from multiple phases emitting errors to the same stage
* as well as errors from one stage going to multiple stages
*/
File outputDir = TMP_FOLDER.newFolder();
String output1 = new File(outputDir, "output1").getAbsolutePath();
String output2 = new File(outputDir, "output2").getAbsolutePath();
DataStreamsConfig config = DataStreamsConfig.builder().setBatchInterval("5s").addStage(new ETLStage("source", MockSource.getPlugin(inputSchema, input))).addStage(new ETLStage("filter1", StringValueFilterTransform.getPlugin("name", "Leo"))).addStage(new ETLStage("filter2", StringValueFilterTransform.getPlugin("name", "Ralph"))).addStage(new ETLStage("agg1", GroupFilterAggregator.getPlugin("name", "Don"))).addStage(new ETLStage("agg2", GroupFilterAggregator.getPlugin("name", "Mike"))).addStage(new ETLStage("errorflatten", FlattenErrorTransform.getPlugin())).addStage(new ETLStage("errorfilter", FilterErrorTransform.getPlugin(3))).addStage(new ETLStage("sink1", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "sink1", output1))).addStage(new ETLStage("sink2", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "sink2", output2))).addConnection("source", "filter1").addConnection("filter1", "filter2").addConnection("filter2", "agg1").addConnection("agg1", "agg2").addConnection("filter1", "errorflatten").addConnection("filter1", "errorfilter").addConnection("filter2", "errorflatten").addConnection("filter2", "errorfilter").addConnection("agg1", "errorflatten").addConnection("agg1", "errorfilter").addConnection("agg2", "errorflatten").addConnection("agg2", "errorfilter").addConnection("errorflatten", "sink1").addConnection("errorfilter", "sink2").setCheckpointDir(checkpointDir).build();
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("ErrTransformTest");
ApplicationManager appManager = deployApplication(appId, appRequest);
Map<String, String> args = Collections.singletonMap(io.cdap.cdap.etl.common.Constants.CONSOLIDATE_STAGES, "true");
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.startAndWaitForGoodRun(args, ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
Schema flattenSchema = Schema.recordOf("erroruser", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("errMsg", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("errCode", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("errStage", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
Set<StructuredRecord> expected = ImmutableSet.of(StructuredRecord.builder(flattenSchema).set("name", "Leo").set("errMsg", "bad string value").set("errCode", 1).set("errStage", "filter1").build(), StructuredRecord.builder(flattenSchema).set("name", "Ralph").set("errMsg", "bad string value").set("errCode", 1).set("errStage", "filter2").build(), StructuredRecord.builder(flattenSchema).set("name", "Don").set("errMsg", "bad val").set("errCode", 3).set("errStage", "agg1").build(), StructuredRecord.builder(flattenSchema).set("name", "Mike").set("errMsg", "bad val").set("errCode", 3).set("errStage", "agg2").build());
Tasks.waitFor(true, () -> {
Set<StructuredRecord> outputRecords = new HashSet<>(MockExternalSink.readOutput(output1, flattenSchema));
return expected.equals(outputRecords);
}, 4, TimeUnit.MINUTES);
Set<StructuredRecord> expected2 = ImmutableSet.of(StructuredRecord.builder(inputSchema).set("name", "Leo").build(), StructuredRecord.builder(inputSchema).set("name", "Ralph").build());
Tasks.waitFor(true, () -> {
Set<StructuredRecord> outputRecords = new HashSet<>(MockExternalSink.readOutput(output2, inputSchema));
return expected2.equals(outputRecords);
}, 4, TimeUnit.MINUTES);
}
use of io.cdap.cdap.etl.proto.v2.DataStreamsConfig in project cdap by cdapio.
the class PreviewDataStreamsTest method testDataStreamsPreviewRun.
@Test
public void testDataStreamsPreviewRun() throws Exception {
PreviewManager previewManager = getPreviewManager();
String sinkTableName = "singleOutput";
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> records = new ArrayList<>();
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordTest = StructuredRecord.builder(schema).set("name", "test").build();
records.add(recordSamuel);
records.add(recordBob);
records.add(recordTest);
/*
* source --> transform -> sink
*/
DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(schema, records))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setNumOfRecordsPreview(100).setBatchInterval("1s").setCheckpointDir("file://" + TMP_FOLDER.getRoot().toPath().toString()).build();
// Construct the preview config with the program name and program type.
PreviewConfig previewConfig = new PreviewConfig(DataStreamsSparkLauncher.NAME, ProgramType.SPARK, Collections.<String, String>emptyMap(), 1);
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
// Wait for the preview to be running and wait until the records are processed in the sink.
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
Map<String, List<JsonElement>> data = previewManager.getData(previewId, "sink");
return data != null && data.get(DATA_TRACER_PROPERTY) != null && data.get(DATA_TRACER_PROPERTY).size() == 3;
}
}, 1, TimeUnit.MINUTES);
// check data in source and transform
checkPreviewStore(previewManager, previewId, "source", 3);
checkPreviewStore(previewManager, previewId, "transform", 3);
// Wait for the pipeline to be shutdown by timer.
TimeUnit.MINUTES.sleep(1);
Tasks.waitFor(PreviewStatus.Status.KILLED_BY_TIMER, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
return previewManager.getStatus(previewId).getStatus();
}
}, 1, TimeUnit.MINUTES);
// Validate the metrics for preview
validateMetric(3, previewId, "source.records.out", previewManager);
validateMetric(3, previewId, "transform.records.in", previewManager);
validateMetric(3, previewId, "transform.records.out", previewManager);
validateMetric(3, previewId, "sink.records.in", previewManager);
validateMetric(3, previewId, "sink.records.out", previewManager);
// Check the sink table is not created in the real space.
DataSetManager<Table> sinkManager = getDataset(sinkTableName);
Assert.assertNull(sinkManager.get());
}
Aggregations