use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.
the class DataPipelineTest method testSimpleUpgradePipelinesWithSnapshotArtifact.
/* Tests upgrade for a deployed application. Also tests that SNAPSHOT artifacts are being considered for upgrade.
1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
2. Add new versions of application artifact (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0).
3. Also deploy a snapshot version of app artifact 1.3.0-SNAPSHOT and plugin artifact 1.1.1-SNAPSHOT bind to it.
3. Upgrade the older deployed application.
4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
in its config and it uses snapshot versions for both.
*/
@Test
public void testSimpleUpgradePipelinesWithSnapshotArtifact() throws Exception {
ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
Engine engine = Engine.MAPREDUCE;
String sourceName = "testSource" + engine.name();
String sinkName = "testSink" + engine.name();
ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
// Deploy app with artifact version 1.0.0.
ApplicationManager appManager = deployApplication(appId, appRequest);
ApplicationDetail oldAppDetail = getAppDetail(appId);
ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
// Upgrade application with allowSnapshot set to true.
appManager.upgrade(Collections.emptySet(), true);
ApplicationDetail upgradedAppDetail = getAppDetail(appId);
ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
// Compare stages that should be same after upgrade.
Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
// Verify that after upgrade, application upgrades artifact version to latest version available.
Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_3_SNAPSHOT.getVersion(), upgradedAppDetail.getArtifact().getVersion());
// Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
Assert.assertEquals(upgradedPlugin.getArtifactConfig().getVersion(), "1.1.1-SNAPSHOT");
Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.USER);
}
use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.
the class DataPipelineTest method testNoConnectorsForSourceCondition.
@Test
public void testNoConnectorsForSourceCondition() throws Exception {
//
// condition1-->condition2-->source-->sink
//
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin("simpleNoConnectorConditionSource", schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin("trueOutput"))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addConnection("condition1", "condition2", true).addConnection("condition2", "source", true).addConnection("source", "trueSink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("NoConnectorForSourceConditionApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
// write records to source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("simpleNoConnectorConditionSource"));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition1.branch.to.execute", "true", "condition2.branch.to.execute", "true"));
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset("trueOutput");
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.
the class DataPipelineTest method testFailureToStartIncapableProgram.
@Test
public void testFailureToStartIncapableProgram() throws Exception {
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", IncapableSource.getPlugin())).addStage(new ETLStage("sink", IncapableSink.getPlugin())).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("IncapableApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
// starting the workflow should throw incapable exception as the pipeline contains incapable plugins
workflowManager.start();
// the program should fail as it has incapable plugins
workflowManager.waitForRun(ProgramRunStatus.FAILED, 5, TimeUnit.MINUTES);
}
use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.
the class DataPipelineConnectionTest method testConnectionsRegistry.
@Test
public void testConnectionsRegistry() throws Exception {
// source -> sink
ETLBatchConfig conf1 = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPluginUsingConnection("conn 1"))).addStage(new ETLStage("sink", MockSink.getPluginUsingConnection("conn 3"))).addConnection("source", "sink").build();
// 3 sources -> identity -> 2 sinks
ETLBatchConfig conf2 = ETLBatchConfig.builder().addStage(new ETLStage("src1", MockSource.getPluginUsingConnection("conn 1"))).addStage(new ETLStage("src2", MockSource.getPluginUsingConnection("conn 2"))).addStage(new ETLStage("src3", MockSource.getPluginUsingConnection("conn 3"))).addStage(new ETLStage("sink1", MockSink.getPluginUsingConnection("conn 4"))).addStage(new ETLStage("sink2", MockSink.getPluginUsingConnection("conn 5"))).addStage(new ETLStage("identity", IdentityTransform.getPlugin())).addConnection("src1", "identity").addConnection("src2", "identity").addConnection("src3", "identity").addConnection("identity", "sink1").addConnection("identity", "sink2").build();
// deploy apps
AppRequest<ETLBatchConfig> appRequest1 = new AppRequest<>(APP_ARTIFACT, conf1);
ApplicationId appId1 = NamespaceId.DEFAULT.app("app1");
ApplicationManager appManager1 = deployApplication(appId1, appRequest1);
AppRequest<ETLBatchConfig> appRequest2 = new AppRequest<>(APP_ARTIFACT, conf2);
ApplicationId appId2 = NamespaceId.DEFAULT.app("app2");
ApplicationManager appManager2 = deployApplication(appId2, appRequest2);
// Assert metadata
Metadata app1Actual = getMetadataAdmin().getMetadata(appId1.toMetadataEntity(), MetadataScope.SYSTEM);
Set<String> app1ExpectedTags = ImmutableSet.of("_conn_1", "_conn_3");
// here assert actual tags contain all the tags about connections
Assert.assertTrue(app1Actual.getTags(MetadataScope.SYSTEM).containsAll(app1ExpectedTags));
// user metadata should be empty
Assert.assertEquals(Metadata.EMPTY, getMetadataAdmin().getMetadata(appId1.toMetadataEntity(), MetadataScope.USER));
Metadata app2Actual = getMetadataAdmin().getMetadata(appId2.toMetadataEntity(), MetadataScope.SYSTEM);
Set<String> app2ExpectedTags = ImmutableSet.of("_conn_1", "_conn_2", "_conn_3", "_conn_4", "_conn_5");
// here assert actual tags contain all the tags about connections
Assert.assertTrue(app2Actual.getTags(MetadataScope.SYSTEM).containsAll(app2ExpectedTags));
// user metadata should be empty
Assert.assertEquals(Metadata.EMPTY, getMetadataAdmin().getMetadata(appId2.toMetadataEntity(), MetadataScope.USER));
// using search query to find out the related apps
Set<MetadataEntity> appsRelated = ImmutableSet.of(appId1.toMetadataEntity(), appId2.toMetadataEntity());
assertMetadataSearch(appsRelated, "tags:_conn_1");
assertMetadataSearch(Collections.singleton(appId2.toMetadataEntity()), "tags:_conn_2");
assertMetadataSearch(appsRelated, "tags:_conn_3");
assertMetadataSearch(Collections.singleton(appId2.toMetadataEntity()), "tags:_conn_4");
assertMetadataSearch(Collections.singleton(appId2.toMetadataEntity()), "tags:_conn_5");
}
use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.
the class DataPipelineConnectionTest method testUsingConnections.
private void testUsingConnections(Engine engine) throws Exception {
String sourceConnName = "sourceConn " + engine;
String sinkConnName = "sinkConn " + engine;
String srcTableName = "src" + engine;
String sinkTableName = "sink" + engine;
// add some bad json object to the property
addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", srcTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", sinkTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
// add json string to the runtime arguments to ensure plugin can get instantiated under such condition
Map<String, String> runtimeArguments = Collections.singletonMap("badval", "{\"a\" : 1}");
// source -> sink
ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPluginUsingConnection(sourceConnName))).addStage(new ETLStage("sink", MockSink.getPluginUsingConnection(sinkConnName))).addConnection("source", "sink").build();
Schema schema = Schema.recordOf("x", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord samuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord dwayne = StructuredRecord.builder(schema).set("name", "dwayne").build();
// add the dataset by the test, the source won't create it since table name is macro enabled
addDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName), Table.class.getName());
DataSetManager<Table> sourceTable = getDataset(srcTableName);
MockSource.writeInput(sourceTable, ImmutableList.of(samuel, dwayne));
// verify preview can run successfully using connections
PreviewManager previewManager = getPreviewManager();
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, runtimeArguments, 10);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, new AppRequest<>(APP_ARTIFACT, config, previewConfig));
// Wait for the preview status go into COMPLETED.
Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewManager.getStatus(previewId);
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("testApp" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
// start the actual pipeline run
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.startAndWaitForGoodRun(runtimeArguments, ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
DataSetManager<Table> sinkTable = getDataset(sinkTableName);
List<StructuredRecord> outputRecords = MockSink.readOutput(sinkTable);
Assert.assertEquals(ImmutableSet.of(dwayne, samuel), new HashSet<>(outputRecords));
// modify the connection to use a new table name for source and sink
String newSrcTableName = "new" + srcTableName;
String newSinkTableName = "new" + sinkTableName;
addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSrcTableName), new ArtifactSelectorConfig())));
addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSinkTableName), new ArtifactSelectorConfig())));
addDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName), Table.class.getName());
StructuredRecord newRecord1 = StructuredRecord.builder(schema).set("name", "john").build();
StructuredRecord newRecord2 = StructuredRecord.builder(schema).set("name", "tom").build();
sourceTable = getDataset(newSrcTableName);
MockSource.writeInput(sourceTable, ImmutableList.of(newRecord1, newRecord2));
// run the program again, it should use the new table to read and write
manager.start(runtimeArguments);
manager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 3, TimeUnit.MINUTES);
sinkTable = getDataset(newSinkTableName);
outputRecords = MockSink.readOutput(sinkTable);
Assert.assertEquals(ImmutableSet.of(newRecord1, newRecord2), new HashSet<>(outputRecords));
deleteConnection(sourceConnName);
deleteConnection(sinkConnName);
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sinkTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSinkTableName));
}
Aggregations