use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method deployPipelineWithSchedule.
private WorkflowManager deployPipelineWithSchedule(String pipelineName, Engine engine, String triggeringPipelineName, ArgumentMapping key1Mapping, String expectedKey1Value, PluginPropertyMapping key2Mapping, String expectedKey2Value) throws Exception {
String tableName = "actionScheduleTable" + pipelineName + engine;
String sourceName = "macroActionWithScheduleInput-" + pipelineName + engine;
String sinkName = "macroActionWithScheduleOutput-" + pipelineName + engine;
String key1 = key1Mapping.getTarget();
String key2 = key2Mapping.getTarget();
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("action1", MockAction.getPlugin(tableName, "row1", "column1", String.format("${%s}", key1)))).addStage(new ETLStage("action2", MockAction.getPlugin(tableName, "row2", "column2", String.format("${%s}", key2)))).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter1", StringValueFilterTransform.getPlugin("name", String.format("${%s}", key1)))).addStage(new ETLStage("filter2", StringValueFilterTransform.getPlugin("name", String.format("${%s}", key2)))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("action1", "action2").addConnection("action2", "source").addConnection("source", "filter1").addConnection("filter1", "filter2").addConnection("filter2", "sink").setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(pipelineName);
ApplicationManager appManager = deployApplication(appId, appRequest);
// there should be only two programs - one workflow and one mapreduce/spark
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
// Use the expectedKey1Value and expectedKey2Value as values for two records, so that Only record "samuel"
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordKey1Value = StructuredRecord.builder(schema).set("name", expectedKey1Value).build();
StructuredRecord recordKey2Value = StructuredRecord.builder(schema).set("name", expectedKey2Value).build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(sourceName);
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordKey1Value, recordKey2Value));
String defaultNamespace = NamespaceId.DEFAULT.getNamespace();
// Use properties from the triggering pipeline as values for runtime argument key1, key2
TriggeringPropertyMapping propertyMapping = new TriggeringPropertyMapping(ImmutableList.of(key1Mapping), ImmutableList.of(key2Mapping));
ProgramStatusTrigger completeTrigger = new ProgramStatusTrigger(new WorkflowId(defaultNamespace, triggeringPipelineName, SmartWorkflow.NAME), ImmutableSet.of(ProgramStatus.COMPLETED));
ScheduleId scheduleId = appId.schedule("completeSchedule");
appManager.addSchedule(new ScheduleDetail(scheduleId.getNamespace(), scheduleId.getApplication(), scheduleId.getVersion(), scheduleId.getSchedule(), "", new ScheduleProgramInfo(SchedulableProgramType.WORKFLOW, SmartWorkflow.NAME), ImmutableMap.of(SmartWorkflow.TRIGGERING_PROPERTIES_MAPPING, GSON.toJson(propertyMapping)), completeTrigger, ImmutableList.of(), Schedulers.JOB_QUEUE_TIMEOUT_MILLIS, null, null));
appManager.enableSchedule(scheduleId);
return appManager.getWorkflowManager(SmartWorkflow.NAME);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testKVTableLookup.
@Test
public void testKVTableLookup() throws Exception {
addDatasetInstance(KeyValueTable.class.getName(), "ageTable");
DataSetManager<KeyValueTable> lookupTable = getDataset("ageTable");
lookupTable.get().write("samuel".getBytes(Charsets.UTF_8), "12".getBytes(Charsets.UTF_8));
lookupTable.get().write("bob".getBytes(Charsets.UTF_8), "36".getBytes(Charsets.UTF_8));
lookupTable.get().write("jane".getBytes(Charsets.UTF_8), "25".getBytes(Charsets.UTF_8));
lookupTable.flush();
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin("inputTable"))).addStage(new ETLStage("transform", LookupTransform.getPlugin("person", "age", "ageTable"))).addStage(new ETLStage("sink", MockSink.getPlugin("outputTable"))).addConnection("source", "transform").addConnection("transform", "sink").build();
ApplicationId appId = NamespaceId.DEFAULT.app("testKVTableLookup");
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId, appRequest);
// set up input data
Schema inputSchema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema).set("person", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(inputSchema).set("person", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(inputSchema).set("person", "jane").build();
DataSetManager<Table> inputTable = getDataset("inputTable");
MockSource.writeInput(inputTable, ImmutableList.of(recordSamuel, recordBob, recordJane));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME).start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
Schema schema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.STRING)));
Set<StructuredRecord> expected = new HashSet<>();
expected.add(StructuredRecord.builder(schema).set("person", "samuel").set("age", "12").build());
expected.add(StructuredRecord.builder(schema).set("person", "bob").set("age", "36").build());
expected.add(StructuredRecord.builder(schema).set("person", "jane").set("age", "25").build());
DataSetManager<Table> outputTable = getDataset("outputTable");
Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(outputTable));
Assert.assertEquals(expected, actual);
validateMetric(3, appId, "source.records.out");
validateMetric(3, appId, "sink.records.in");
deleteDatasetInstance(NamespaceId.DEFAULT.dataset("inputTable"));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset("outputTable"));
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testNestedCondition.
private void testNestedCondition(Engine engine) throws Exception {
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* action1----->|
* |
* action2----->|
* |
* V
* source --> condition1 --> sink1
* |
* |------->condition2 --> sink2
* |
* |-------> condition3----> sink3----->action3----->condition4---->action4
* |
* |------>action5
*/
String appName = "NestedCondition-" + engine;
String source = appName + "Source-" + engine;
String sink1 = appName + "Sink1-" + engine;
String sink2 = appName + "Sink2-" + engine;
String sink3 = appName + "Sink3-" + engine;
String actionTable = "actionTable" + appName + "-" + engine;
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(source, schema))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2))).addStage(new ETLStage("sink3", MockSink.getPlugin(sink3))).addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, "row2", "key2", "val2"))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, "row3", "key3", "val3"))).addStage(new ETLStage("action4", MockAction.getPlugin(actionTable, "row4", "key4", "val4"))).addStage(new ETLStage("action5", MockAction.getPlugin(actionTable, "row5", "key5", "val5"))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addStage(new ETLStage("condition3", MockCondition.getPlugin("condition3"))).addStage(new ETLStage("condition4", MockCondition.getPlugin("condition4"))).addConnection("action1", "condition1").addConnection("action2", "condition1").addConnection("source", "condition1").addConnection("condition1", "sink1", true).addConnection("condition1", "condition2", false).addConnection("condition2", "sink2", true).addConnection("condition2", "condition3", false).addConnection("condition3", "sink3", true).addConnection("sink3", "action3").addConnection("action3", "condition4").addConnection("condition4", "action4", true).addConnection("condition4", "action5", false).setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(appName);
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
// write records to source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition3.branch.to.execute", "true", "condition4.branch.to.execute", "true"));
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset(sink3);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
// other sinks should not have any data in them
sinkManager = getDataset(sink1);
Assert.assertTrue(MockSink.readOutput(sinkManager).isEmpty());
sinkManager = getDataset(sink2);
Assert.assertTrue(MockSink.readOutput(sinkManager).isEmpty());
// check actions
DataSetManager<Table> actionTableDS = getDataset(actionTable);
// action1 is executed
Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
// action2 is executed
Assert.assertEquals("val2", MockAction.readOutput(actionTableDS, "row2", "key2"));
// action3 is executed
Assert.assertEquals("val3", MockAction.readOutput(actionTableDS, "row3", "key3"));
// action4 is executed
Assert.assertEquals("val4", MockAction.readOutput(actionTableDS, "row4", "key4"));
// action5 should not get executed.
Assert.assertNull(MockAction.readOutput(actionTableDS, "row5", "key5"));
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testSimpleUpgradePipelinesWithSnapshotArtifact.
/* Tests upgrade for a deployed application. Also tests that SNAPSHOT artifacts are being considered for upgrade.
1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
2. Add new versions of application artifact (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0).
3. Also deploy a snapshot version of app artifact 1.3.0-SNAPSHOT and plugin artifact 1.1.1-SNAPSHOT bind to it.
3. Upgrade the older deployed application.
4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
in its config and it uses snapshot versions for both.
*/
@Test
public void testSimpleUpgradePipelinesWithSnapshotArtifact() throws Exception {
ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
Engine engine = Engine.MAPREDUCE;
String sourceName = "testSource" + engine.name();
String sinkName = "testSink" + engine.name();
ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
// Deploy app with artifact version 1.0.0.
ApplicationManager appManager = deployApplication(appId, appRequest);
ApplicationDetail oldAppDetail = getAppDetail(appId);
ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
// Upgrade application with allowSnapshot set to true.
appManager.upgrade(Collections.emptySet(), true);
ApplicationDetail upgradedAppDetail = getAppDetail(appId);
ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
// Compare stages that should be same after upgrade.
Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
// Verify that after upgrade, application upgrades artifact version to latest version available.
Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_3_SNAPSHOT.getVersion(), upgradedAppDetail.getArtifact().getVersion());
// Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
Assert.assertEquals(upgradedPlugin.getArtifactConfig().getVersion(), "1.1.1-SNAPSHOT");
Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.USER);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testNoConnectorsForSourceCondition.
@Test
public void testNoConnectorsForSourceCondition() throws Exception {
//
// condition1-->condition2-->source-->sink
//
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin("simpleNoConnectorConditionSource", schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin("trueOutput"))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addConnection("condition1", "condition2", true).addConnection("condition2", "source", true).addConnection("source", "trueSink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("NoConnectorForSourceConditionApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
// write records to source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("simpleNoConnectorConditionSource"));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition1.branch.to.execute", "true", "condition2.branch.to.execute", "true"));
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset("trueOutput");
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
}
Aggregations