use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.
the class DataPipelineTest method testServiceUrl.
private void testServiceUrl(Engine engine) throws Exception {
// Deploy the ServiceApp application
ApplicationManager appManager = deployApplication(ServiceApp.class);
// Start Greeting service and use it
ServiceManager serviceManager = appManager.getServiceManager(ServiceApp.Name.SERVICE_NAME).start();
// Wait service startup
serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
URL url = new URL(serviceManager.getServiceURL(), "name");
HttpRequest httpRequest = HttpRequest.post(url).withBody("bob").build();
HttpResponse httpResponse = HttpRequests.execute(httpRequest, new DefaultHttpRequestConfig(false));
Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
url = new URL(serviceManager.getServiceURL(), "name/bob");
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
Assert.assertEquals(HttpURLConnection.HTTP_OK, connection.getResponseCode());
String response;
try {
response = new String(ByteStreams.toByteArray(connection.getInputStream()), Charsets.UTF_8);
} finally {
connection.disconnect();
}
Assert.assertEquals("bob", response);
String sourceName = "ServiceUrlInput-" + engine.name();
String sinkName = "ServiceUrlOutput-" + engine.name();
/*
* source --> filter --> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", FilterTransform.getPlugin("name"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ServiceUrl-" + engine);
appManager = deployApplication(appId, appRequest);
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceName));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check output
DataSetManager<Table> sinkManager = getDataset(sinkName);
Set<StructuredRecord> expected = ImmutableSet.of(recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
serviceManager.stop();
serviceManager.waitForRun(ProgramRunStatus.KILLED, 180, TimeUnit.SECONDS);
}
use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.
the class DataPipelineTest method testPipelineWithAllActions.
@Test
public void testPipelineWithAllActions() throws Exception {
String actionTable = "actionTable";
String action1RowKey = "action1.row";
String action1ColumnKey = "action1.column";
String action1Value = "action1.value";
String action2RowKey = "action2.row";
String action2ColumnKey = "action2.column";
String action2Value = "action2.value";
String action3RowKey = "action3.row";
String action3ColumnKey = "action3.column";
String action3Value = "action3.value";
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, action1RowKey, action1ColumnKey, action1Value))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, action2RowKey, action2ColumnKey, action2Value))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, action3RowKey, action3ColumnKey, action3Value))).addConnection("action1", "action2").addConnection("action1", "action3").setEngine(Engine.MAPREDUCE).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("MyActionOnlyApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
DataSetManager<Table> actionTableDS = getDataset(actionTable);
Assert.assertEquals(action1Value, MockAction.readOutput(actionTableDS, action1RowKey, action1ColumnKey));
Assert.assertEquals(action2Value, MockAction.readOutput(actionTableDS, action2RowKey, action2ColumnKey));
Assert.assertEquals(action3Value, MockAction.readOutput(actionTableDS, action3RowKey, action3ColumnKey));
List<RunRecord> history = workflowManager.getHistory(ProgramRunStatus.COMPLETED);
Assert.assertEquals(1, history.size());
String runId = history.get(0).getPid();
WorkflowTokenDetail tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action1RowKey + action1ColumnKey);
validateToken(tokenDetail, action1RowKey + action1ColumnKey, action1Value);
tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action2RowKey + action2ColumnKey);
validateToken(tokenDetail, action2RowKey + action2ColumnKey, action2Value);
tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action3RowKey + action3ColumnKey);
validateToken(tokenDetail, action3RowKey + action3ColumnKey, action3Value);
}
use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.
the class DataPipelineTest method testParallelAggregators.
private void testParallelAggregators(Engine engine) throws Exception {
String source1Name = "pAggInput1-" + engine.name();
String source2Name = "pAggInput2-" + engine.name();
String sink1Name = "pAggOutput1-" + engine.name();
String sink2Name = "pAggOutput2-" + engine.name();
Schema inputSchema = Schema.recordOf("testRecord", Schema.Field.of("user", Schema.of(Schema.Type.STRING)), Schema.Field.of("item", Schema.of(Schema.Type.LONG)));
/*
source1 --|--> agg1 --> sink1
|
source2 --|--> agg2 --> sink2
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source1", MockSource.getPlugin(source1Name, inputSchema))).addStage(new ETLStage("source2", MockSource.getPlugin(source2Name, inputSchema))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Name))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Name))).addStage(new ETLStage("agg1", FieldCountAggregator.getPlugin("user", "string"))).addStage(new ETLStage("agg2", FieldCountAggregator.getPlugin("item", "long"))).addConnection("source1", "agg1").addConnection("source1", "agg2").addConnection("source2", "agg1").addConnection("source2", "agg2").addConnection("agg1", "sink1").addConnection("agg2", "sink2").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ParallelAggApp-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
// write few records to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source1Name));
MockSource.writeInput(inputManager, ImmutableList.of(StructuredRecord.builder(inputSchema).set("user", "samuel").set("item", 1L).build(), StructuredRecord.builder(inputSchema).set("user", "samuel").set("item", 2L).build()));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(source2Name));
MockSource.writeInput(inputManager, ImmutableList.of(StructuredRecord.builder(inputSchema).set("user", "samuel").set("item", 3L).build(), StructuredRecord.builder(inputSchema).set("user", "john").set("item", 4L).build(), StructuredRecord.builder(inputSchema).set("user", "john").set("item", 3L).build()));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
Schema outputSchema1 = Schema.recordOf("user.count", Schema.Field.of("user", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.LONG)));
Schema outputSchema2 = Schema.recordOf("item.count", Schema.Field.of("item", Schema.of(Schema.Type.LONG)), Schema.Field.of("ct", Schema.of(Schema.Type.LONG)));
// check output
DataSetManager<Table> sinkManager = getDataset(sink1Name);
Set<StructuredRecord> expected = ImmutableSet.of(StructuredRecord.builder(outputSchema1).set("user", "all").set("ct", 5L).build(), StructuredRecord.builder(outputSchema1).set("user", "samuel").set("ct", 3L).build(), StructuredRecord.builder(outputSchema1).set("user", "john").set("ct", 2L).build());
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
sinkManager = getDataset(sink2Name);
expected = ImmutableSet.of(StructuredRecord.builder(outputSchema2).set("item", 0L).set("ct", 5L).build(), StructuredRecord.builder(outputSchema2).set("item", 1L).set("ct", 1L).build(), StructuredRecord.builder(outputSchema2).set("item", 2L).set("ct", 1L).build(), StructuredRecord.builder(outputSchema2).set("item", 3L).set("ct", 2L).build(), StructuredRecord.builder(outputSchema2).set("item", 4L).set("ct", 1L).build());
actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(2, appId, "source1.records.out");
validateMetric(3, appId, "source2.records.out");
validateMetric(5, appId, "agg1.records.in");
// 2 users, but FieldCountAggregator always emits an 'all' group
validateMetric(3, appId, "agg1.aggregator.groups");
validateMetric(3, appId, "agg1.records.out");
validateMetric(5, appId, "agg2.records.in");
// 4 items, but FieldCountAggregator always emits an 'all' group
validateMetric(5, appId, "agg2.aggregator.groups");
validateMetric(5, appId, "agg2.records.out");
validateMetric(3, appId, "sink1.records.in");
validateMetric(5, appId, "sink2.records.in");
}
use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.
the class DataPipelineTest method testExternalDatasetTracking.
private void testExternalDatasetTracking(Engine engine, boolean backwardsCompatible) throws Exception {
String suffix = engine.name() + (backwardsCompatible ? "-bc" : "");
// Define input/output datasets
String expectedExternalDatasetInput = "fileInput-" + suffix;
String expectedExternalDatasetOutput = "fileOutput-" + suffix;
// Define input/output directories
File inputDir = TMP_FOLDER.newFolder("input-" + suffix);
String inputFile = "input-file1.txt";
File outputDir = TMP_FOLDER.newFolder("output-" + suffix);
File outputSubDir1 = new File(outputDir, "subdir1");
File outputSubDir2 = new File(outputDir, "subdir2");
if (!backwardsCompatible) {
// Assert that there are no external datasets
Assert.assertNull(getDataset(NamespaceId.DEFAULT.dataset(expectedExternalDatasetInput)).get());
Assert.assertNull(getDataset(NamespaceId.DEFAULT.dataset(expectedExternalDatasetOutput)).get());
}
String name = backwardsCompatible ? null : expectedExternalDatasetOutput;
ETLBatchConfig.Builder builder = ETLBatchConfig.builder();
ETLBatchConfig etlConfig = builder.setEngine(engine).addStage(new ETLStage("source", MockExternalSource.getPlugin(expectedExternalDatasetInput, inputDir.getAbsolutePath()))).addStage(new ETLStage("sink", MockExternalSink.getPlugin(name, "dir1", outputSubDir1.getAbsolutePath(), name, "dir2", outputSubDir2.getAbsolutePath()))).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ExternalDatasetApp-" + suffix);
ApplicationManager appManager = deployApplication(appId, appRequest);
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
ImmutableList<StructuredRecord> allInput = ImmutableList.of(recordSamuel, recordBob, recordJane);
// Create input files
MockExternalSource.writeInput(new File(inputDir, inputFile).getAbsolutePath(), allInput);
Map<String, String> args = Collections.singletonMap(io.cdap.cdap.etl.common.Constants.CONSOLIDATE_STAGES, "true");
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.startAndWaitForGoodRun(args, ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
List<RunRecord> history = workflowManager.getHistory();
// there should be only one completed run
Assert.assertEquals(1, history.size());
Assert.assertEquals(ProgramRunStatus.COMPLETED, history.get(0).getStatus());
// Assert output
Assert.assertEquals(allInput, MockExternalSink.readOutput(outputSubDir1.getAbsolutePath(), schema));
Assert.assertEquals(allInput, MockExternalSink.readOutput(outputSubDir2.getAbsolutePath(), schema));
if (!backwardsCompatible) {
// Assert that external datasets got created
Assert.assertNotNull(getDataset(NamespaceId.DEFAULT.dataset(expectedExternalDatasetInput)).get());
Assert.assertNotNull(getDataset(NamespaceId.DEFAULT.dataset(expectedExternalDatasetOutput)).get());
}
}
use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.
the class DataPipelineTest method testMacroEvaluationActionPipeline.
private void testMacroEvaluationActionPipeline(Engine engine) throws Exception {
ETLStage action1 = new ETLStage("action1", MockAction.getPlugin("actionTable", "action1.row", "action1.column", "${value}"));
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(action1).setEngine(engine).build();
// set runtime arguments for macro substitution
Map<String, String> runtimeArguments = ImmutableMap.of("value", "macroValue");
AppRequest<io.cdap.cdap.etl.proto.v2.ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("macroActionTest-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.setRuntimeArgs(runtimeArguments);
manager.start(ImmutableMap.of("logical.start.time", "0"));
manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
DataSetManager<Table> actionTableDS = getDataset("actionTable");
Assert.assertEquals("macroValue", MockAction.readOutput(actionTableDS, "action1.row", "action1.column"));
}
Aggregations