Search in sources :

Example 61 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testSimpleUpgradePipelinesWithSnapshotArtifact.

/* Tests upgrade for a deployed application. Also tests that SNAPSHOT artifacts are being considered for upgrade.
     1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
     2. Add new versions of application artifact (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0).
     3. Also deploy a snapshot version of app artifact 1.3.0-SNAPSHOT and plugin artifact 1.1.1-SNAPSHOT bind to it.
     3. Upgrade the older deployed application.
     4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
        in its config and it uses snapshot versions for both.
   */
@Test
public void testSimpleUpgradePipelinesWithSnapshotArtifact() throws Exception {
    ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
    Engine engine = Engine.MAPREDUCE;
    String sourceName = "testSource" + engine.name();
    String sinkName = "testSink" + engine.name();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
    // Deploy app with artifact version 1.0.0.
    ApplicationManager appManager = deployApplication(appId, appRequest);
    ApplicationDetail oldAppDetail = getAppDetail(appId);
    ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Upgrade application with allowSnapshot set to true.
    appManager.upgrade(Collections.emptySet(), true);
    ApplicationDetail upgradedAppDetail = getAppDetail(appId);
    ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Compare stages that should be same after upgrade.
    Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
    Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
    // Verify that after upgrade, application upgrades artifact version to latest version available.
    Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_3_SNAPSHOT.getVersion(), upgradedAppDetail.getArtifact().getVersion());
    // Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
    ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
    Assert.assertEquals(upgradedPlugin.getArtifactConfig().getVersion(), "1.1.1-SNAPSHOT");
    Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.USER);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) LineageAdmin(io.cdap.cdap.metadata.LineageAdmin) Arrays(java.util.Arrays) MockJoiner(io.cdap.cdap.etl.mock.batch.joiner.MockJoiner) Bytes(io.cdap.cdap.api.common.Bytes) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) MockRuntimeDatasetSource(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSource) MockSink(io.cdap.cdap.etl.mock.batch.MockSink) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) MockAction(io.cdap.cdap.etl.mock.action.MockAction) MockSource(io.cdap.cdap.etl.mock.batch.MockSource) IdentityAggregator(io.cdap.cdap.etl.mock.batch.aggregator.IdentityAggregator) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ScheduleId(io.cdap.cdap.proto.id.ScheduleId) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) Map(java.util.Map) LookupTransform(io.cdap.cdap.etl.mock.batch.LookupTransform) MetadataAdmin(io.cdap.cdap.metadata.MetadataAdmin) ClassRule(org.junit.ClassRule) FilterErrorTransform(io.cdap.cdap.etl.mock.transform.FilterErrorTransform) ScheduleDetail(io.cdap.cdap.proto.ScheduleDetail) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) PrintWriter(java.io.PrintWriter) ValueFilter(io.cdap.cdap.datapipeline.plugin.ValueFilter) Table(io.cdap.cdap.api.dataset.table.Table) GroupFilterAggregator(io.cdap.cdap.etl.mock.batch.aggregator.GroupFilterAggregator) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PluginClass(io.cdap.cdap.api.plugin.PluginClass) SchedulableProgramType(io.cdap.cdap.api.schedule.SchedulableProgramType) InputField(io.cdap.cdap.api.lineage.field.InputField) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ByteStreams(com.google.common.io.ByteStreams) DataSetManager(io.cdap.cdap.test.DataSetManager) ServiceManager(io.cdap.cdap.test.ServiceManager) FileSetArguments(io.cdap.cdap.api.dataset.lib.FileSetArguments) IdentityTransform(io.cdap.cdap.etl.mock.transform.IdentityTransform) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) RunRecord(io.cdap.cdap.proto.RunRecord) ArrayList(java.util.ArrayList) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) PluggableFilterTransform(io.cdap.cdap.datapipeline.plugin.PluggableFilterTransform) MockRuntimeDatasetSink(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSink) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) MockExternalSource(io.cdap.cdap.etl.mock.batch.MockExternalSource) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) Charsets(com.google.common.base.Charsets) Message(io.cdap.cdap.api.messaging.Message) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) StringValueFilterTransform(io.cdap.cdap.etl.mock.transform.StringValueFilterTransform) Test(org.junit.Test) NaiveBayesClassifier(io.cdap.cdap.datapipeline.mock.NaiveBayesClassifier) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) File(java.io.File) IncapableSource(io.cdap.cdap.etl.mock.batch.IncapableSource) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schedulers(io.cdap.cdap.internal.app.runtime.schedule.store.Schedulers) BufferedReader(java.io.BufferedReader) Assert(org.junit.Assert) WordCount(io.cdap.cdap.datapipeline.spark.WordCount) FieldLineageAdmin(io.cdap.cdap.metadata.FieldLineageAdmin) HttpRequest(io.cdap.common.http.HttpRequest) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) Engine(io.cdap.cdap.etl.api.Engine) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) URL(java.net.URL) NaiveBayesTrainer(io.cdap.cdap.datapipeline.mock.NaiveBayesTrainer) HttpResponse(io.cdap.common.http.HttpResponse) TimeoutException(java.util.concurrent.TimeoutException) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) NodeStatesAction(io.cdap.cdap.etl.mock.batch.NodeStatesAction) Gson(com.google.gson.Gson) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Metadata(io.cdap.cdap.spi.metadata.Metadata) After(org.junit.After) ProgramStatus(io.cdap.cdap.api.ProgramStatus) DropNullTransform(io.cdap.cdap.etl.mock.transform.DropNullTransform) RunId(org.apache.twill.api.RunId) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) FieldCountAggregator(io.cdap.cdap.etl.mock.batch.aggregator.FieldCountAggregator) Tasks(io.cdap.cdap.common.utils.Tasks) DatasetFieldLineageSummary(io.cdap.cdap.metadata.DatasetFieldLineageSummary) ImmutableSet(com.google.common.collect.ImmutableSet) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) ImmutableMap(com.google.common.collect.ImmutableMap) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) NullAlertTransform(io.cdap.cdap.etl.mock.alert.NullAlertTransform) UUID(java.util.UUID) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ProgramStatusTrigger(io.cdap.cdap.internal.app.runtime.schedule.trigger.ProgramStatusTrigger) List(java.util.List) ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) IntValueFilterTransform(io.cdap.cdap.etl.mock.transform.IntValueFilterTransform) Constants(io.cdap.cdap.common.conf.Constants) ArtifactScope(io.cdap.cdap.api.artifact.ArtifactScope) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) Alert(io.cdap.cdap.etl.api.Alert) SleepTransform(io.cdap.cdap.etl.mock.transform.SleepTransform) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) BeforeClass(org.junit.BeforeClass) TestConfiguration(io.cdap.cdap.test.TestConfiguration) MockExternalSink(io.cdap.cdap.etl.mock.batch.MockExternalSink) MockCondition(io.cdap.cdap.etl.mock.condition.MockCondition) HydratorTestBase(io.cdap.cdap.etl.mock.test.HydratorTestBase) TMSAlertPublisher(io.cdap.cdap.etl.mock.alert.TMSAlertPublisher) HashMap(java.util.HashMap) DefaultHttpRequestConfig(io.cdap.cdap.common.http.DefaultHttpRequestConfig) NullFieldSplitterTransform(io.cdap.cdap.etl.mock.transform.NullFieldSplitterTransform) LineFilterProgram(io.cdap.cdap.datapipeline.spark.LineFilterProgram) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FlattenErrorTransform(io.cdap.cdap.etl.mock.transform.FlattenErrorTransform) FieldRelation(io.cdap.cdap.metadata.FieldRelation) Compat(io.cdap.cdap.etl.spark.Compat) HttpRequests(io.cdap.common.http.HttpRequests) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) PrintStream(java.io.PrintStream) FieldLineageAction(io.cdap.cdap.etl.mock.action.FieldLineageAction) FilterTransform(io.cdap.cdap.etl.mock.batch.FilterTransform) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) RunIds(io.cdap.cdap.common.app.RunIds) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) ProgramId(io.cdap.cdap.proto.id.ProgramId) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ServiceApp(io.cdap.cdap.datapipeline.service.ServiceApp) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) IncapableSink(io.cdap.cdap.etl.mock.batch.IncapableSink) TimeUnit(java.util.concurrent.TimeUnit) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) FileReader(java.io.FileReader) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Collections(java.util.Collections) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Engine(io.cdap.cdap.etl.api.Engine) Test(org.junit.Test)

Example 62 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testNoConnectorsForSourceCondition.

@Test
public void testNoConnectorsForSourceCondition() throws Exception {
    // 
    // condition1-->condition2-->source-->sink
    // 
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin("simpleNoConnectorConditionSource", schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin("trueOutput"))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addConnection("condition1", "condition2", true).addConnection("condition2", "source", true).addConnection("source", "trueSink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("NoConnectorForSourceConditionApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    // write records to source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("simpleNoConnectorConditionSource"));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start(ImmutableMap.of("condition1.branch.to.execute", "true", "condition2.branch.to.execute", "true"));
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // check sink
    DataSetManager<Table> sinkManager = getDataset("trueOutput");
    Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 63 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testFailureToStartIncapableProgram.

@Test
public void testFailureToStartIncapableProgram() throws Exception {
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", IncapableSource.getPlugin())).addStage(new ETLStage("sink", IncapableSink.getPlugin())).addConnection("source", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("IncapableApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    // starting the workflow should throw incapable exception as the pipeline contains incapable plugins
    workflowManager.start();
    // the program should fail as it has incapable plugins
    workflowManager.waitForRun(ProgramRunStatus.FAILED, 5, TimeUnit.MINUTES);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 64 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineConnectionTest method testConnectionsRegistry.

@Test
public void testConnectionsRegistry() throws Exception {
    // source -> sink
    ETLBatchConfig conf1 = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPluginUsingConnection("conn 1"))).addStage(new ETLStage("sink", MockSink.getPluginUsingConnection("conn 3"))).addConnection("source", "sink").build();
    // 3 sources -> identity -> 2 sinks
    ETLBatchConfig conf2 = ETLBatchConfig.builder().addStage(new ETLStage("src1", MockSource.getPluginUsingConnection("conn 1"))).addStage(new ETLStage("src2", MockSource.getPluginUsingConnection("conn 2"))).addStage(new ETLStage("src3", MockSource.getPluginUsingConnection("conn 3"))).addStage(new ETLStage("sink1", MockSink.getPluginUsingConnection("conn 4"))).addStage(new ETLStage("sink2", MockSink.getPluginUsingConnection("conn 5"))).addStage(new ETLStage("identity", IdentityTransform.getPlugin())).addConnection("src1", "identity").addConnection("src2", "identity").addConnection("src3", "identity").addConnection("identity", "sink1").addConnection("identity", "sink2").build();
    // deploy apps
    AppRequest<ETLBatchConfig> appRequest1 = new AppRequest<>(APP_ARTIFACT, conf1);
    ApplicationId appId1 = NamespaceId.DEFAULT.app("app1");
    ApplicationManager appManager1 = deployApplication(appId1, appRequest1);
    AppRequest<ETLBatchConfig> appRequest2 = new AppRequest<>(APP_ARTIFACT, conf2);
    ApplicationId appId2 = NamespaceId.DEFAULT.app("app2");
    ApplicationManager appManager2 = deployApplication(appId2, appRequest2);
    // Assert metadata
    Metadata app1Actual = getMetadataAdmin().getMetadata(appId1.toMetadataEntity(), MetadataScope.SYSTEM);
    Set<String> app1ExpectedTags = ImmutableSet.of("_conn_1", "_conn_3");
    // here assert actual tags contain all the tags about connections
    Assert.assertTrue(app1Actual.getTags(MetadataScope.SYSTEM).containsAll(app1ExpectedTags));
    // user metadata should be empty
    Assert.assertEquals(Metadata.EMPTY, getMetadataAdmin().getMetadata(appId1.toMetadataEntity(), MetadataScope.USER));
    Metadata app2Actual = getMetadataAdmin().getMetadata(appId2.toMetadataEntity(), MetadataScope.SYSTEM);
    Set<String> app2ExpectedTags = ImmutableSet.of("_conn_1", "_conn_2", "_conn_3", "_conn_4", "_conn_5");
    // here assert actual tags contain all the tags about connections
    Assert.assertTrue(app2Actual.getTags(MetadataScope.SYSTEM).containsAll(app2ExpectedTags));
    // user metadata should be empty
    Assert.assertEquals(Metadata.EMPTY, getMetadataAdmin().getMetadata(appId2.toMetadataEntity(), MetadataScope.USER));
    // using search query to find out the related apps
    Set<MetadataEntity> appsRelated = ImmutableSet.of(appId1.toMetadataEntity(), appId2.toMetadataEntity());
    assertMetadataSearch(appsRelated, "tags:_conn_1");
    assertMetadataSearch(Collections.singleton(appId2.toMetadataEntity()), "tags:_conn_2");
    assertMetadataSearch(appsRelated, "tags:_conn_3");
    assertMetadataSearch(Collections.singleton(appId2.toMetadataEntity()), "tags:_conn_4");
    assertMetadataSearch(Collections.singleton(appId2.toMetadataEntity()), "tags:_conn_5");
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Metadata(io.cdap.cdap.spi.metadata.Metadata) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 65 with ApplicationId

use of io.cdap.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineConnectionTest method testUsingConnections.

private void testUsingConnections(Engine engine) throws Exception {
    String sourceConnName = "sourceConn " + engine;
    String sinkConnName = "sinkConn " + engine;
    String srcTableName = "src" + engine;
    String sinkTableName = "sink" + engine;
    // add some bad json object to the property
    addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", srcTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
    addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", sinkTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
    // add json string to the runtime arguments to ensure plugin can get instantiated under such condition
    Map<String, String> runtimeArguments = Collections.singletonMap("badval", "{\"a\" : 1}");
    // source -> sink
    ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPluginUsingConnection(sourceConnName))).addStage(new ETLStage("sink", MockSink.getPluginUsingConnection(sinkConnName))).addConnection("source", "sink").build();
    Schema schema = Schema.recordOf("x", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    StructuredRecord samuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord dwayne = StructuredRecord.builder(schema).set("name", "dwayne").build();
    // add the dataset by the test, the source won't create it since table name is macro enabled
    addDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName), Table.class.getName());
    DataSetManager<Table> sourceTable = getDataset(srcTableName);
    MockSource.writeInput(sourceTable, ImmutableList.of(samuel, dwayne));
    // verify preview can run successfully using connections
    PreviewManager previewManager = getPreviewManager();
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, runtimeArguments, 10);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, new AppRequest<>(APP_ARTIFACT, config, previewConfig));
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewManager.getStatus(previewId);
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("testApp" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // start the actual pipeline run
    WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    manager.startAndWaitForGoodRun(runtimeArguments, ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    DataSetManager<Table> sinkTable = getDataset(sinkTableName);
    List<StructuredRecord> outputRecords = MockSink.readOutput(sinkTable);
    Assert.assertEquals(ImmutableSet.of(dwayne, samuel), new HashSet<>(outputRecords));
    // modify the connection to use a new table name for source and sink
    String newSrcTableName = "new" + srcTableName;
    String newSinkTableName = "new" + sinkTableName;
    addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSrcTableName), new ArtifactSelectorConfig())));
    addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSinkTableName), new ArtifactSelectorConfig())));
    addDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName), Table.class.getName());
    StructuredRecord newRecord1 = StructuredRecord.builder(schema).set("name", "john").build();
    StructuredRecord newRecord2 = StructuredRecord.builder(schema).set("name", "tom").build();
    sourceTable = getDataset(newSrcTableName);
    MockSource.writeInput(sourceTable, ImmutableList.of(newRecord1, newRecord2));
    // run the program again, it should use the new table to read and write
    manager.start(runtimeArguments);
    manager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 3, TimeUnit.MINUTES);
    sinkTable = getDataset(newSinkTableName);
    outputRecords = MockSink.readOutput(sinkTable);
    Assert.assertEquals(ImmutableSet.of(newRecord1, newRecord2), new HashSet<>(outputRecords));
    deleteConnection(sourceConnName);
    deleteConnection(sinkConnName);
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sinkTableName));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSinkTableName));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PreviewManager(io.cdap.cdap.app.preview.PreviewManager) ConnectionCreationRequest(io.cdap.cdap.etl.proto.connection.ConnectionCreationRequest) PluginInfo(io.cdap.cdap.etl.proto.connection.PluginInfo) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) Table(io.cdap.cdap.api.dataset.table.Table) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) PreviewConfig(io.cdap.cdap.proto.artifact.preview.PreviewConfig)

Aggregations

ApplicationId (io.cdap.cdap.proto.id.ApplicationId)789 Test (org.junit.Test)410 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)279 ProgramId (io.cdap.cdap.proto.id.ProgramId)263 ApplicationManager (io.cdap.cdap.test.ApplicationManager)225 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)223 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)196 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)180 Table (io.cdap.cdap.api.dataset.table.Table)178 WorkflowManager (io.cdap.cdap.test.WorkflowManager)169 NamespaceId (io.cdap.cdap.proto.id.NamespaceId)154 Schema (io.cdap.cdap.api.data.schema.Schema)147 ArrayList (java.util.ArrayList)129 HashSet (java.util.HashSet)126 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)124 HashMap (java.util.HashMap)109 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)107 ArtifactSummary (io.cdap.cdap.api.artifact.ArtifactSummary)88 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)88 Path (javax.ws.rs.Path)75