use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testAlertPublisher.
private void testAlertPublisher(Engine engine) throws Exception {
String sourceName = "alertSource" + engine.name();
String sinkName = "alertSink" + engine.name();
String topic = "alertTopic" + engine.name();
/*
* source --> nullAlert --> sink
* |
* |--> TMS publisher
*/
ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms alert", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms alert").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
DataSetManager<Table> sourceTable = getDataset(sourceName);
MockSource.writeInput(sourceTable, ImmutableList.of(record1, record2, alertRecord));
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.start();
manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
DataSetManager<Table> sinkTable = getDataset(sinkName);
Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(sinkTable));
Set<StructuredRecord> expected = ImmutableSet.of(record1, record2);
Assert.assertEquals(expected, actual);
MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
Set<Alert> actualMessages = new HashSet<>();
try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
while (iter.hasNext()) {
Message message = iter.next();
Alert alert = message.decodePayload(r -> GSON.fromJson(r, Alert.class));
actualMessages.add(alert);
}
}
Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<>()));
Assert.assertEquals(expectedMessages, actualMessages);
validateMetric(3, appId, "source.records.out");
validateMetric(3, appId, "nullAlert.records.in");
validateMetric(2, appId, "nullAlert.records.out");
validateMetric(1, appId, "nullAlert.records.alert");
validateMetric(2, appId, "sink.records.in");
validateMetric(1, appId, "tms alert.records.in");
}
use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testSimpleConditionWithActions.
@Test
public void testSimpleConditionWithActions() throws Exception {
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* action --> condition --> file ---> trueSink
* |
* |---file-->----> falseSink
*
*/
String appName = "SimpleConditionWithActions";
String trueSource = "true" + appName + "Source";
String falseSource = "false" + appName + "Source";
String trueSink = "true" + appName + "Sink";
String falseSink = "false" + appName + "Sink";
String actionTable = "actionTable" + appName;
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("trueSource", MockSource.getPlugin(trueSource, schema))).addStage(new ETLStage("falseSource", MockSource.getPlugin(falseSource, schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin(trueSink))).addStage(new ETLStage("falseSink", MockSink.getPlugin(falseSink))).addStage(new ETLStage("condition", MockCondition.getPlugin("condition"))).addStage(new ETLStage("action", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addConnection("action", "condition").addConnection("condition", "trueSource", true).addConnection("condition", "falseSource", false).addConnection("trueSource", "trueSink").addConnection("falseSource", "falseSink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(appName);
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
for (String branch : Arrays.asList("true", "false")) {
String source = branch.equals("true") ? trueSource : falseSource;
String sink = branch.equals("true") ? trueSink : falseSink;
// write records to source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition.branch.to.execute", branch));
if (branch.equals("true")) {
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
} else {
workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
}
// check sink
DataSetManager<Table> sinkManager = getDataset(sink);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(2, appId, branch + "Source.records.out");
validateMetric(2, appId, branch + "Sink.records.in");
// check Action is executed correctly
DataSetManager<Table> actionTableDS = getDataset(actionTable);
Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
}
}
use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testSplitterToJoiner.
private void testSplitterToJoiner(Engine engine) throws Exception {
Schema schema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
Schema infoSchema = Schema.recordOf("userInfo", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("fname", Schema.of(Schema.Type.STRING)));
Schema joinedSchema = Schema.recordOf("join.output", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("fname", Schema.of(Schema.Type.STRING)));
StructuredRecord user0 = StructuredRecord.builder(schema).set("id", 0L).build();
StructuredRecord user1 = StructuredRecord.builder(schema).set("id", 1L).set("name", "one").build();
StructuredRecord user0Info = StructuredRecord.builder(infoSchema).set("id", 0L).set("fname", "zero").build();
StructuredRecord user0Joined = StructuredRecord.builder(joinedSchema).set("id", 0L).set("fname", "zero").build();
String signupsName = "splitjoinSignups" + engine.name();
String userInfoName = "splitjoinUserInfo" + engine.name();
String sink1Name = "splitjoinSink1" + engine.name();
String sink2Name = "splitjoinSink2" + engine.name();
/*
* userInfo --------------------------|
* |--> joiner --> sink1
* |null --|
* signups --> namesplitter --|
* |non-null --> sink2
*/
ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("signups", MockSource.getPlugin(signupsName, schema))).addStage(new ETLStage("userInfo", MockSource.getPlugin(userInfoName, infoSchema))).addStage(new ETLStage("namesplitter", NullFieldSplitterTransform.getPlugin("name"))).addStage(new ETLStage("joiner", MockJoiner.getPlugin("namesplitter.id=userInfo.id", "namesplitter,userInfo", ""))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Name))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Name))).addConnection("signups", "namesplitter").addConnection("namesplitter", "sink2", "non-null").addConnection("namesplitter", "joiner", "null").addConnection("userInfo", "joiner").addConnection("joiner", "sink1").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("SplitJoinTest-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
// write signups data
DataSetManager<Table> signupsManager = getDataset(signupsName);
MockSource.writeInput(signupsManager, ImmutableList.of(user0, user1));
// write to userInfo the name for user0 to join against
DataSetManager<Table> userInfoManager = getDataset(userInfoName);
MockSource.writeInput(userInfoManager, ImmutableList.of(user0Info));
// run pipeline
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check output
DataSetManager<Table> sinkManager = getDataset(sink2Name);
Set<StructuredRecord> expected = ImmutableSet.of(user1);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
sinkManager = getDataset(sink1Name);
expected = ImmutableSet.of(user0Joined);
actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(2, appId, "signups.records.out");
validateMetric(1, appId, "userInfo.records.out");
validateMetric(2, appId, "namesplitter.records.in");
validateMetric(1, appId, "namesplitter.records.out.null");
validateMetric(1, appId, "namesplitter.records.out.non-null");
validateMetric(2, appId, "joiner.records.in");
validateMetric(1, appId, "joiner.records.out");
validateMetric(1, appId, "sink1.records.in");
validateMetric(1, appId, "sink2.records.in");
}
use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testMacrosSparkPipeline.
/**
* Tests that if macros are provided
*/
@Test
public void testMacrosSparkPipeline() throws Exception {
/*
* Trivial Spark pipeline from batch source to batch sink.
*
* source --------- sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(Engine.SPARK).addStage(new ETLStage("source", MockRuntimeDatasetSource.getPlugin("sparkinput", "${runtime${source}}"))).addStage(new ETLStage("sink", MockRuntimeDatasetSink.getPlugin("sparkoutput", "${runtime}${sink}"))).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("SparkApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
// set runtime arguments for macro substitution
Map<String, String> runtimeArguments = ImmutableMap.of("runtime", "mockRuntime", "sink", "SparkSinkDataset", "source", "Source", "runtimeSource", "mockRuntimeSparkSourceDataset");
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.setRuntimeArgs(runtimeArguments);
// make sure the datasets don't exist beforehand
Assert.assertNull(getDataset("mockRuntimeSparkSourceDataset").get());
Assert.assertNull(getDataset("mockRuntimeSparkSinkDataset").get());
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// now the datasets should exist
Assert.assertNotNull(getDataset("mockRuntimeSparkSourceDataset").get());
Assert.assertNotNull(getDataset("mockRuntimeSparkSinkDataset").get());
}
use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testSplitterToConnector.
private void testSplitterToConnector(Engine engine) throws Exception {
Schema schema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("email", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
StructuredRecord user0 = StructuredRecord.builder(schema).set("id", 0L).build();
StructuredRecord user1 = StructuredRecord.builder(schema).set("id", 1L).set("email", "one@example.com").build();
StructuredRecord user2 = StructuredRecord.builder(schema).set("id", 2L).set("name", "two").build();
StructuredRecord user3 = StructuredRecord.builder(schema).set("id", 3L).set("name", "three").set("email", "three@example.com").build();
String sourceName = "splitconSource" + engine.name();
String sink1Name = "splitconSink1" + engine.name();
String sink2Name = "splitconSink2" + engine.name();
/*
*
* |null --> sink1
* |null--> identity-agg --> splitter2 --|
* source --> splitter1--| |non-null --|
* | |--> sink2
* |non-null-----------------------------------------|
*/
ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("splitter1", NullFieldSplitterTransform.getPlugin("name"))).addStage(new ETLStage("splitter2", NullFieldSplitterTransform.getPlugin("email"))).addStage(new ETLStage("identity", IdentityAggregator.getPlugin())).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Name))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Name))).addConnection("source", "splitter1").addConnection("splitter1", "identity", "null").addConnection("splitter1", "sink2", "non-null").addConnection("identity", "splitter2").addConnection("splitter2", "sink1", "null").addConnection("splitter2", "sink2", "non-null").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("SplitConTest-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
// write input data
DataSetManager<Table> inputManager = getDataset(sourceName);
MockSource.writeInput(inputManager, ImmutableList.of(user0, user1, user2, user3));
// run pipeline
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check output
// sink1 should only have records where both name and email are null (user0)
DataSetManager<Table> sinkManager = getDataset(sink1Name);
Set<StructuredRecord> expected = ImmutableSet.of(user0);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
// sink2 should have anything with a non-null name or non-null email
sinkManager = getDataset(sink2Name);
expected = ImmutableSet.of(user1, user2, user3);
actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(4, appId, "source.records.out");
validateMetric(1, appId, "sink1.records.in");
validateMetric(3, appId, "sink2.records.in");
}
Aggregations