use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testSimpleCondition.
private void testSimpleCondition(Engine engine) throws Exception {
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* source --> condition --> trueSink
* |
* |-------> falseSink
*
*/
String appName = "SimpleCondition-" + engine;
String source = appName + "Source-" + engine;
String trueSink = "true" + appName + "Sink-" + engine;
String falseSink = "false" + appName + "Sink-" + engine;
String conditionTableName = "condition-" + engine;
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(source, schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin(trueSink))).addStage(new ETLStage("falseSink", MockSink.getPlugin(falseSink))).addStage(new ETLStage("condition", MockCondition.getPlugin("condition", conditionTableName))).addConnection("source", "condition").addConnection("condition", "trueSink", true).addConnection("condition", "falseSink", false).setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(appName);
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
// write records to source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = null;
for (String branch : Arrays.asList("true", "false")) {
String sink = branch.equals("true") ? trueSink : falseSink;
workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition.branch.to.execute", branch));
if (branch.equals("true")) {
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
} else {
workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
}
// check sink
DataSetManager<Table> sinkManager = getDataset(sink);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
// check condition table
DataSetManager<Table> conditionTableDS = getDataset(conditionTableName);
Assert.assertEquals("2", MockCondition.readOutput(conditionTableDS, "stats", "source.input.records"));
Assert.assertEquals("2", MockCondition.readOutput(conditionTableDS, "stats", "source.output.records"));
Assert.assertEquals("0", MockCondition.readOutput(conditionTableDS, "stats", "source.error.records"));
validateMetric(branch.equals("true") ? 2 : 4, appId, "source.records.out");
validateMetric(2, appId, branch + "Sink.records.in");
}
boolean foundStatisticsInToken = false;
if (workflowManager != null) {
List<RunRecord> history = workflowManager.getHistory(ProgramRunStatus.COMPLETED);
// Checking for single run should be fine.
String runId = history.get(0).getPid();
for (WorkflowToken.Scope scope : Arrays.asList(WorkflowToken.Scope.SYSTEM, WorkflowToken.Scope.USER)) {
WorkflowTokenDetail token = workflowManager.getToken(runId, scope, null);
for (Map.Entry<String, List<WorkflowTokenDetail.NodeValueDetail>> tokenData : token.getTokenData().entrySet()) {
if (tokenData.getKey().startsWith(co.cask.cdap.etl.common.Constants.StageStatistics.PREFIX)) {
foundStatisticsInToken = true;
break;
}
}
}
}
Assert.assertTrue(foundStatisticsInToken);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testServiceUrl.
public void testServiceUrl(Engine engine) throws Exception {
// Deploy the ServiceApp application
ApplicationManager appManager = deployApplication(ServiceApp.class);
// Start Greeting service and use it
ServiceManager serviceManager = appManager.getServiceManager(ServiceApp.Name.SERVICE_NAME).start();
// Wait service startup
serviceManager.waitForStatus(true);
URL url = new URL(serviceManager.getServiceURL(), "name");
HttpRequest httpRequest = HttpRequest.post(url).withBody("bob").build();
HttpResponse httpResponse = HttpRequests.execute(httpRequest);
Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
url = new URL(serviceManager.getServiceURL(), "name/bob");
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
Assert.assertEquals(HttpURLConnection.HTTP_OK, connection.getResponseCode());
String response;
try {
response = new String(ByteStreams.toByteArray(connection.getInputStream()), Charsets.UTF_8);
} finally {
connection.disconnect();
}
Assert.assertEquals("bob", response);
String sourceName = "ServiceUrlInput-" + engine.name();
String sinkName = "ServiceUrlOutput-" + engine.name();
/*
* source --> filter --> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", FilterTransform.getPlugin("name"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ServiceUrl-" + engine);
appManager = deployApplication(appId, appRequest);
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceName));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check output
DataSetManager<Table> sinkManager = getDataset(sinkName);
Set<StructuredRecord> expected = ImmutableSet.of(recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
serviceManager.stop();
serviceManager.waitForRun(ProgramRunStatus.KILLED, 180, TimeUnit.SECONDS);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testPipelineWithAllActions.
@Test
public void testPipelineWithAllActions() throws Exception {
String actionTable = "actionTable";
String action1RowKey = "action1.row";
String action1ColumnKey = "action1.column";
String action1Value = "action1.value";
String action2RowKey = "action2.row";
String action2ColumnKey = "action2.column";
String action2Value = "action2.value";
String action3RowKey = "action3.row";
String action3ColumnKey = "action3.column";
String action3Value = "action3.value";
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, action1RowKey, action1ColumnKey, action1Value))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, action2RowKey, action2ColumnKey, action2Value))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, action3RowKey, action3ColumnKey, action3Value))).addConnection("action1", "action2").addConnection("action1", "action3").setEngine(Engine.MAPREDUCE).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("MyActionOnlyApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
DataSetManager<Table> actionTableDS = getDataset(actionTable);
Assert.assertEquals(action1Value, MockAction.readOutput(actionTableDS, action1RowKey, action1ColumnKey));
Assert.assertEquals(action2Value, MockAction.readOutput(actionTableDS, action2RowKey, action2ColumnKey));
Assert.assertEquals(action3Value, MockAction.readOutput(actionTableDS, action3RowKey, action3ColumnKey));
List<RunRecord> history = workflowManager.getHistory(ProgramRunStatus.COMPLETED);
Assert.assertEquals(1, history.size());
String runId = history.get(0).getPid();
WorkflowTokenDetail tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action1RowKey + action1ColumnKey);
validateToken(tokenDetail, action1RowKey + action1ColumnKey, action1Value);
tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action2RowKey + action2ColumnKey);
validateToken(tokenDetail, action2RowKey + action2ColumnKey, action2Value);
tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action3RowKey + action3ColumnKey);
validateToken(tokenDetail, action3RowKey + action3ColumnKey, action3Value);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testAlertPublisher.
private void testAlertPublisher(Engine engine) throws Exception {
String sourceName = "alertSource" + engine.name();
String sinkName = "alertSink" + engine.name();
String topic = "alertTopic" + engine.name();
/*
* source --> nullAlert --> sink
* |
* |--> TMS publisher
*/
ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
DataSetManager<Table> sourceTable = getDataset(sourceName);
MockSource.writeInput(sourceTable, ImmutableList.of(record1, record2, alertRecord));
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.start();
manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
DataSetManager<Table> sinkTable = getDataset(sinkName);
Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(sinkTable));
Set<StructuredRecord> expected = ImmutableSet.of(record1, record2);
Assert.assertEquals(expected, actual);
MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
Set<Alert> actualMessages = new HashSet<>();
try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
while (iter.hasNext()) {
Message message = iter.next();
Alert alert = GSON.fromJson(message.getPayloadAsString(), Alert.class);
actualMessages.add(alert);
}
}
Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<String, String>()));
Assert.assertEquals(expectedMessages, actualMessages);
validateMetric(3, appId, "source.records.out");
validateMetric(3, appId, "nullAlert.records.in");
validateMetric(2, appId, "nullAlert.records.out");
validateMetric(1, appId, "nullAlert.records.alert");
validateMetric(2, appId, "sink.records.in");
validateMetric(1, appId, "tms.records.in");
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testMacrosSparkPipeline.
/**
* Tests that if macros are provided
*/
@Test
public void testMacrosSparkPipeline() throws Exception {
/*
* Trivial Spark pipeline from batch source to batch sink.
*
* source --------- sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").setEngine(Engine.SPARK).addStage(new ETLStage("source", MockRuntimeDatasetSource.getPlugin("sparkinput", "${runtime${source}}"))).addStage(new ETLStage("sink", MockRuntimeDatasetSink.getPlugin("sparkoutput", "${runtime}${sink}"))).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("SparkApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
// set runtime arguments for macro substitution
Map<String, String> runtimeArguments = ImmutableMap.of("runtime", "mockRuntime", "sink", "SparkSinkDataset", "source", "Source", "runtimeSource", "mockRuntimeSparkSourceDataset");
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.setRuntimeArgs(runtimeArguments);
// make sure the datasets don't exist beforehand
Assert.assertNull(getDataset("mockRuntimeSparkSourceDataset").get());
Assert.assertNull(getDataset("mockRuntimeSparkSinkDataset").get());
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// now the datasets should exist
Assert.assertNotNull(getDataset("mockRuntimeSparkSourceDataset").get());
Assert.assertNotNull(getDataset("mockRuntimeSparkSinkDataset").get());
}
Aggregations