use of io.cdap.cdap.test.ApplicationManager in project cdap by caskdata.
the class DataPipelineTest method testOuterJoin.
private void testOuterJoin(Engine engine) throws Exception {
Schema inputSchema1 = Schema.recordOf("customerRecord", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
Schema inputSchema2 = Schema.recordOf("itemRecord", Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_price", Schema.of(Schema.Type.LONG)), Schema.Field.of("cust_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING)));
Schema inputSchema3 = Schema.recordOf("transactionRecord", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_name", Schema.of(Schema.Type.STRING)));
String input1Name = "source1OuterJoinInput-" + engine;
String input2Name = "source2OuterJoinInput-" + engine;
String input3Name = "source3OuterJoinInput-" + engine;
String outputName = "outerJoinOutput-" + engine;
String joinerName = "outerJoiner-" + engine;
String sinkName = "outerJoinSink-" + engine;
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source1", MockSource.getPlugin(input1Name, inputSchema1))).addStage(new ETLStage("source2", MockSource.getPlugin(input2Name, inputSchema2))).addStage(new ETLStage("source3", MockSource.getPlugin(input3Name, inputSchema3))).addStage(new ETLStage("t1", IdentityTransform.getPlugin())).addStage(new ETLStage("t2", IdentityTransform.getPlugin())).addStage(new ETLStage("t3", IdentityTransform.getPlugin())).addStage(new ETLStage(joinerName, MockJoiner.getPlugin("t1.customer_id=t2.cust_id=t3.c_id&" + "t1.customer_name=t2.cust_name=t3.c_name", "t1", ""))).addStage(new ETLStage(sinkName, MockSink.getPlugin(outputName))).addConnection("source1", "t1").addConnection("source2", "t2").addConnection("source3", "t3").addConnection("t1", joinerName).addConnection("t2", joinerName).addConnection("t3", joinerName).addConnection(joinerName, sinkName).setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("OuterJoinApp-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
Schema outSchema = Schema.recordOf("join.output", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_price", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("cust_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("cust_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("t_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
StructuredRecord recordMartha = StructuredRecord.builder(inputSchema1).set("customer_id", "4").set("customer_name", "martha").build();
StructuredRecord recordCar = StructuredRecord.builder(inputSchema2).set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").build();
StructuredRecord recordBike = StructuredRecord.builder(inputSchema2).set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").build();
StructuredRecord recordTrasCar = StructuredRecord.builder(inputSchema3).set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
StructuredRecord recordTrasPlane = StructuredRecord.builder(inputSchema3).set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
StructuredRecord recordTrasBike = StructuredRecord.builder(inputSchema3).set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(input1Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane, recordMartha));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(input2Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordCar, recordBike));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(input3Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordTrasCar, recordTrasPlane, recordTrasBike));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
StructuredRecord joinRecordSamuel = StructuredRecord.builder(outSchema).set("customer_id", "1").set("customer_name", "samuel").set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
StructuredRecord joinRecordBob = StructuredRecord.builder(outSchema).set("customer_id", "2").set("customer_name", "bob").set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
StructuredRecord joinRecordJane = StructuredRecord.builder(outSchema).set("customer_id", "3").set("customer_name", "jane").set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
StructuredRecord joinRecordMartha = StructuredRecord.builder(outSchema).set("customer_id", "4").set("customer_name", "martha").build();
DataSetManager<Table> sinkManager = getDataset(outputName);
Set<StructuredRecord> expected = ImmutableSet.of(joinRecordSamuel, joinRecordJane, joinRecordBob, joinRecordMartha);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(4, appId, joinerName + ".records.out");
validateMetric(4, appId, sinkName + ".records.in");
}
use of io.cdap.cdap.test.ApplicationManager in project cdap by caskdata.
the class DataPipelineTest method runHeadTriggeringPipeline.
private void runHeadTriggeringPipeline(Engine engine, String expectedValue1, String expectedValue2) throws Exception {
// set runtime arguments
Map<String, String> runtimeArguments = ImmutableMap.of("head-arg", expectedValue1);
ETLStage action1 = new ETLStage("action1", MockAction.getPlugin("actionTable", "action1.row", "action1.column", expectedValue2));
ETLBatchConfig etlConfig = io.cdap.cdap.etl.proto.v2.ETLBatchConfig.builder().addStage(action1).setEngine(engine).build();
AppRequest<io.cdap.cdap.etl.proto.v2.ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("head");
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.setRuntimeArgs(runtimeArguments);
manager.start(ImmutableMap.of("logical.start.time", "0"));
manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
}
use of io.cdap.cdap.test.ApplicationManager in project cdap by caskdata.
the class DataPipelineTest method testSimpleControlOnlyDag.
@Test
public void testSimpleControlOnlyDag() throws Exception {
//
// condition-->action1
// |
// |------->action2
//
String appName = "SimpleControlOnlyDag";
String trueActionTable = "trueActionTable" + appName;
String falseActionTable = "falseActionTable" + appName;
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("condition", MockCondition.getPlugin("condition"))).addStage(new ETLStage("action1", MockAction.getPlugin(trueActionTable, "row1", "key1", "val1"))).addStage(new ETLStage("action2", MockAction.getPlugin(falseActionTable, "row2", "key2", "val2"))).addConnection("condition", "action1", true).addConnection("condition", "action2", false).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(appName);
ApplicationManager appManager = deployApplication(appId, appRequest);
for (String branch : Arrays.asList("true", "false")) {
String table = branch.equals("true") ? trueActionTable : falseActionTable;
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition.branch.to.execute", branch));
if (branch.equals("true")) {
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
} else {
workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
}
DataSetManager<Table> actionTableDS = getDataset(table);
if (branch.equals("true")) {
Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
} else {
Assert.assertEquals("val2", MockAction.readOutput(actionTableDS, "row2", "key2"));
}
}
}
use of io.cdap.cdap.test.ApplicationManager in project cdap by caskdata.
the class DataPipelineTest method testActionFieldLineage.
private void testActionFieldLineage(Engine engine) throws Exception {
String readDataset = "ActionReadDataset" + engine;
String writeDataset = "ActionWriteDataset" + engine;
List<String> srcFields = ImmutableList.of("srcField1", "srcField2", "srcField3");
Set<String> destFields = ImmutableSet.of("destField1", "destField2", "destField3");
List<Operation> operations = new ArrayList<>();
/*
* |---------> srcField1 -> destField1----|
* | |
* ActionReadDataset -> srcField2 -> destField2 ---|-> ActionWriteDataset
* | |
* |---------> srcField3 -> destField3 ---|
*/
operations.add(new ReadOperation("Read", "1st operation", EndPoint.of("default", readDataset), srcFields));
operations.add(new TransformOperation("Transform1", "2nd operation", Collections.singletonList(InputField.of("Read", "srcField1")), "destField1"));
operations.add(new TransformOperation("Transform2", "3rd operation", Collections.singletonList(InputField.of("Read", "srcField2")), "destField2"));
operations.add(new TransformOperation("Transform3", "4th operation", Collections.singletonList(InputField.of("Read", "srcField3")), "destField3"));
operations.add(new WriteOperation("Write", "5th operation", EndPoint.of("default", writeDataset), ImmutableList.of(InputField.of("Transform1", "destField1"), InputField.of("Transform2", "destField2"), InputField.of("Transform3", "destField3"))));
ETLStage action = new ETLStage("action", FieldLineageAction.getPlugin(readDataset, writeDataset, operations));
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(action).setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ActionFieldLineage-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
FieldLineageAdmin fieldAdmin = getFieldLineageAdmin();
// get field lineage for dest dataset
DatasetFieldLineageSummary summary = fieldAdmin.getDatasetFieldLineage(Constants.FieldLineage.Direction.BOTH, EndPoint.of("default", writeDataset), 0, System.currentTimeMillis());
Assert.assertEquals(NamespaceId.DEFAULT.dataset(writeDataset), summary.getDatasetId());
Assert.assertEquals(destFields, summary.getFields());
Assert.assertTrue(summary.getOutgoing().isEmpty());
Assert.assertEquals(1, summary.getIncoming().size());
Set<FieldRelation> fieldRelations = ImmutableSet.of(new FieldRelation("srcField1", "destField1"), new FieldRelation("srcField2", "destField2"), new FieldRelation("srcField3", "destField3"));
DatasetFieldLineageSummary.FieldLineageRelations expectedRelations = new DatasetFieldLineageSummary.FieldLineageRelations(NamespaceId.DEFAULT.dataset(readDataset), 3, fieldRelations);
Assert.assertEquals(expectedRelations, summary.getIncoming().iterator().next());
// get field lineage for src dataset
summary = fieldAdmin.getDatasetFieldLineage(Constants.FieldLineage.Direction.BOTH, EndPoint.of("default", readDataset), 0, System.currentTimeMillis());
Assert.assertEquals(NamespaceId.DEFAULT.dataset(readDataset), summary.getDatasetId());
Assert.assertEquals(new HashSet<>(srcFields), summary.getFields());
Assert.assertTrue(summary.getIncoming().isEmpty());
Assert.assertEquals(1, summary.getOutgoing().size());
expectedRelations = new DatasetFieldLineageSummary.FieldLineageRelations(NamespaceId.DEFAULT.dataset(writeDataset), 3, fieldRelations);
Assert.assertEquals(expectedRelations, summary.getOutgoing().iterator().next());
LineageAdmin lineageAdmin = getLineageAdmin();
ProgramId programId = appId.workflow(SmartWorkflow.NAME);
RunId runId = RunIds.fromString(workflowManager.getHistory().iterator().next().getPid());
// get dataset lineage for src dataset
Tasks.waitFor(2, () -> {
Lineage lineage = lineageAdmin.computeLineage(NamespaceId.DEFAULT.dataset(readDataset), 0, System.currentTimeMillis(), 1, "workflow");
return lineage.getRelations().size();
}, 10, TimeUnit.SECONDS);
Lineage lineage = lineageAdmin.computeLineage(NamespaceId.DEFAULT.dataset(readDataset), 0, System.currentTimeMillis(), 1, "workflow");
Set<Relation> expectedLineage = ImmutableSet.of(new Relation(NamespaceId.DEFAULT.dataset(readDataset), programId, AccessType.READ, runId), new Relation(NamespaceId.DEFAULT.dataset(writeDataset), programId, AccessType.WRITE, runId));
Assert.assertEquals(expectedLineage, lineage.getRelations());
// get dataset lineage for dest dataset, in this test they should be same
lineage = lineageAdmin.computeLineage(NamespaceId.DEFAULT.dataset(writeDataset), 0, System.currentTimeMillis(), 1, "workflow");
Assert.assertEquals(2, lineage.getRelations().size());
Assert.assertEquals(expectedLineage, lineage.getRelations());
}
use of io.cdap.cdap.test.ApplicationManager in project cdap by caskdata.
the class DataPipelineTest method testSimpleUpgradePipelinesWithArtifactScope.
/* Tests upgrade for a deployed application. Also tests artifact scope parameter for only considering artifacts in
a given scope.
1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
2. Add new versions of application artifacts (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0) in
SYSTEM scope (in test class setup).
3. Also deploy a snapshot version of plugin artifact 1.0.8 in USER scope.
3. Upgrade the older deployed application with artifact scope set to USER for upgrade.
4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
in its config and it uses snapshot plugin version with 1.0.8 from USER scope.
*/
@Test
public void testSimpleUpgradePipelinesWithArtifactScope() throws Exception {
ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
Engine engine = Engine.MAPREDUCE;
String sourceName = "testSource" + engine.name();
String sinkName = "testSink" + engine.name();
ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
// Deploy app with artifact version 1.0.0.
ApplicationManager appManager = deployApplication(appId, appRequest);
ApplicationDetail oldAppDetail = getAppDetail(appId);
ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
// Upgrade application with artifact scope as USER.
appManager.upgrade(Collections.singleton(ArtifactScope.USER.toString()), false);
ApplicationDetail upgradedAppDetail = getAppDetail(appId);
ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
// Compare stages that should be same after upgrade.
Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
// Verify that after upgrade, application upgrades artifact version to latest version available.
Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_2.getVersion(), upgradedAppDetail.getArtifact().getVersion());
// Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
Assert.assertEquals("1.0.8", upgradedPlugin.getArtifactConfig().getVersion());
Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.USER);
}
Aggregations