Search in sources :

Example 1 with Engine

use of io.cdap.cdap.etl.api.Engine in project cdap by caskdata.

the class DataPipelineTest method testSimpleUpgradePipelinesWithArtifactScope.

/* Tests upgrade for a deployed application. Also tests artifact scope parameter for only considering artifacts in
       a given scope.
     1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
     2. Add new versions of application artifacts (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0) in
        SYSTEM scope (in test class setup).
     3. Also deploy a snapshot version of plugin artifact 1.0.8 in USER scope.
     3. Upgrade the older deployed application with artifact scope set to USER for upgrade.
     4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
        in its config and it uses snapshot plugin version with 1.0.8 from USER scope.
   */
@Test
public void testSimpleUpgradePipelinesWithArtifactScope() throws Exception {
    ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
    Engine engine = Engine.MAPREDUCE;
    String sourceName = "testSource" + engine.name();
    String sinkName = "testSink" + engine.name();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
    // Deploy app with artifact version 1.0.0.
    ApplicationManager appManager = deployApplication(appId, appRequest);
    ApplicationDetail oldAppDetail = getAppDetail(appId);
    ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Upgrade application with artifact scope as USER.
    appManager.upgrade(Collections.singleton(ArtifactScope.USER.toString()), false);
    ApplicationDetail upgradedAppDetail = getAppDetail(appId);
    ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Compare stages that should be same after upgrade.
    Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
    Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
    // Verify that after upgrade, application upgrades artifact version to latest version available.
    Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_2.getVersion(), upgradedAppDetail.getArtifact().getVersion());
    // Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
    ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
    Assert.assertEquals("1.0.8", upgradedPlugin.getArtifactConfig().getVersion());
    Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.USER);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) LineageAdmin(io.cdap.cdap.metadata.LineageAdmin) Arrays(java.util.Arrays) MockJoiner(io.cdap.cdap.etl.mock.batch.joiner.MockJoiner) Bytes(io.cdap.cdap.api.common.Bytes) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) MockRuntimeDatasetSource(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSource) MockSink(io.cdap.cdap.etl.mock.batch.MockSink) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) MockAction(io.cdap.cdap.etl.mock.action.MockAction) MockSource(io.cdap.cdap.etl.mock.batch.MockSource) IdentityAggregator(io.cdap.cdap.etl.mock.batch.aggregator.IdentityAggregator) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ScheduleId(io.cdap.cdap.proto.id.ScheduleId) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) Map(java.util.Map) LookupTransform(io.cdap.cdap.etl.mock.batch.LookupTransform) MetadataAdmin(io.cdap.cdap.metadata.MetadataAdmin) ClassRule(org.junit.ClassRule) FilterErrorTransform(io.cdap.cdap.etl.mock.transform.FilterErrorTransform) ScheduleDetail(io.cdap.cdap.proto.ScheduleDetail) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) PrintWriter(java.io.PrintWriter) ValueFilter(io.cdap.cdap.datapipeline.plugin.ValueFilter) Table(io.cdap.cdap.api.dataset.table.Table) GroupFilterAggregator(io.cdap.cdap.etl.mock.batch.aggregator.GroupFilterAggregator) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PluginClass(io.cdap.cdap.api.plugin.PluginClass) SchedulableProgramType(io.cdap.cdap.api.schedule.SchedulableProgramType) InputField(io.cdap.cdap.api.lineage.field.InputField) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ByteStreams(com.google.common.io.ByteStreams) DataSetManager(io.cdap.cdap.test.DataSetManager) ServiceManager(io.cdap.cdap.test.ServiceManager) FileSetArguments(io.cdap.cdap.api.dataset.lib.FileSetArguments) IdentityTransform(io.cdap.cdap.etl.mock.transform.IdentityTransform) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) RunRecord(io.cdap.cdap.proto.RunRecord) ArrayList(java.util.ArrayList) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) PluggableFilterTransform(io.cdap.cdap.datapipeline.plugin.PluggableFilterTransform) MockRuntimeDatasetSink(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSink) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) MockExternalSource(io.cdap.cdap.etl.mock.batch.MockExternalSource) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) Charsets(com.google.common.base.Charsets) Message(io.cdap.cdap.api.messaging.Message) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) StringValueFilterTransform(io.cdap.cdap.etl.mock.transform.StringValueFilterTransform) Test(org.junit.Test) NaiveBayesClassifier(io.cdap.cdap.datapipeline.mock.NaiveBayesClassifier) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) File(java.io.File) IncapableSource(io.cdap.cdap.etl.mock.batch.IncapableSource) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schedulers(io.cdap.cdap.internal.app.runtime.schedule.store.Schedulers) BufferedReader(java.io.BufferedReader) Assert(org.junit.Assert) WordCount(io.cdap.cdap.datapipeline.spark.WordCount) FieldLineageAdmin(io.cdap.cdap.metadata.FieldLineageAdmin) HttpRequest(io.cdap.common.http.HttpRequest) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) Engine(io.cdap.cdap.etl.api.Engine) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) URL(java.net.URL) NaiveBayesTrainer(io.cdap.cdap.datapipeline.mock.NaiveBayesTrainer) HttpResponse(io.cdap.common.http.HttpResponse) TimeoutException(java.util.concurrent.TimeoutException) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) NodeStatesAction(io.cdap.cdap.etl.mock.batch.NodeStatesAction) Gson(com.google.gson.Gson) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Metadata(io.cdap.cdap.spi.metadata.Metadata) After(org.junit.After) ProgramStatus(io.cdap.cdap.api.ProgramStatus) DropNullTransform(io.cdap.cdap.etl.mock.transform.DropNullTransform) RunId(org.apache.twill.api.RunId) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) FieldCountAggregator(io.cdap.cdap.etl.mock.batch.aggregator.FieldCountAggregator) Tasks(io.cdap.cdap.common.utils.Tasks) DatasetFieldLineageSummary(io.cdap.cdap.metadata.DatasetFieldLineageSummary) ImmutableSet(com.google.common.collect.ImmutableSet) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) ImmutableMap(com.google.common.collect.ImmutableMap) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) NullAlertTransform(io.cdap.cdap.etl.mock.alert.NullAlertTransform) UUID(java.util.UUID) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ProgramStatusTrigger(io.cdap.cdap.internal.app.runtime.schedule.trigger.ProgramStatusTrigger) List(java.util.List) ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) IntValueFilterTransform(io.cdap.cdap.etl.mock.transform.IntValueFilterTransform) Constants(io.cdap.cdap.common.conf.Constants) ArtifactScope(io.cdap.cdap.api.artifact.ArtifactScope) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) Alert(io.cdap.cdap.etl.api.Alert) SleepTransform(io.cdap.cdap.etl.mock.transform.SleepTransform) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) BeforeClass(org.junit.BeforeClass) TestConfiguration(io.cdap.cdap.test.TestConfiguration) MockExternalSink(io.cdap.cdap.etl.mock.batch.MockExternalSink) MockCondition(io.cdap.cdap.etl.mock.condition.MockCondition) HydratorTestBase(io.cdap.cdap.etl.mock.test.HydratorTestBase) TMSAlertPublisher(io.cdap.cdap.etl.mock.alert.TMSAlertPublisher) HashMap(java.util.HashMap) DefaultHttpRequestConfig(io.cdap.cdap.common.http.DefaultHttpRequestConfig) NullFieldSplitterTransform(io.cdap.cdap.etl.mock.transform.NullFieldSplitterTransform) LineFilterProgram(io.cdap.cdap.datapipeline.spark.LineFilterProgram) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FlattenErrorTransform(io.cdap.cdap.etl.mock.transform.FlattenErrorTransform) FieldRelation(io.cdap.cdap.metadata.FieldRelation) Compat(io.cdap.cdap.etl.spark.Compat) HttpRequests(io.cdap.common.http.HttpRequests) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) PrintStream(java.io.PrintStream) FieldLineageAction(io.cdap.cdap.etl.mock.action.FieldLineageAction) FilterTransform(io.cdap.cdap.etl.mock.batch.FilterTransform) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) RunIds(io.cdap.cdap.common.app.RunIds) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) ProgramId(io.cdap.cdap.proto.id.ProgramId) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ServiceApp(io.cdap.cdap.datapipeline.service.ServiceApp) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) IncapableSink(io.cdap.cdap.etl.mock.batch.IncapableSink) TimeUnit(java.util.concurrent.TimeUnit) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) FileReader(java.io.FileReader) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Collections(java.util.Collections) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Engine(io.cdap.cdap.etl.api.Engine) Test(org.junit.Test)

Example 2 with Engine

use of io.cdap.cdap.etl.api.Engine in project cdap by caskdata.

the class DataPipelineTest method testSimpleUpgradePipelines.

/* Tests upgrade for a deployed application.
     1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
     2. Add new versions of application artifacts (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0) in
        SYSTEM scope (in test class setup).
     3. Upgrade the older deployed application.
     4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
        in its config.
   */
@Test
public void testSimpleUpgradePipelines() throws Exception {
    ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
    Engine engine = Engine.MAPREDUCE;
    String sourceName = "testSource" + engine.name();
    String sinkName = "testSink" + engine.name();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
    // Deploy app with artifact version 1.0.0.
    ApplicationManager appManager = deployApplication(appId, appRequest);
    ApplicationDetail oldAppDetail = getAppDetail(appId);
    ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Upgrade application.
    appManager.upgrade();
    ApplicationDetail upgradedAppDetail = getAppDetail(appId);
    ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Compare stages that should be same after upgrade.
    Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
    Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
    // Verify that after upgrade, application upgrades artifact version to latest version available.
    Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_2.getVersion(), upgradedAppDetail.getArtifact().getVersion());
    // Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
    ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
    Assert.assertEquals(upgradedPlugin.getArtifactConfig().getVersion(), "1.1.0");
    Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.SYSTEM);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) LineageAdmin(io.cdap.cdap.metadata.LineageAdmin) Arrays(java.util.Arrays) MockJoiner(io.cdap.cdap.etl.mock.batch.joiner.MockJoiner) Bytes(io.cdap.cdap.api.common.Bytes) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) MockRuntimeDatasetSource(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSource) MockSink(io.cdap.cdap.etl.mock.batch.MockSink) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) MockAction(io.cdap.cdap.etl.mock.action.MockAction) MockSource(io.cdap.cdap.etl.mock.batch.MockSource) IdentityAggregator(io.cdap.cdap.etl.mock.batch.aggregator.IdentityAggregator) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ScheduleId(io.cdap.cdap.proto.id.ScheduleId) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) Map(java.util.Map) LookupTransform(io.cdap.cdap.etl.mock.batch.LookupTransform) MetadataAdmin(io.cdap.cdap.metadata.MetadataAdmin) ClassRule(org.junit.ClassRule) FilterErrorTransform(io.cdap.cdap.etl.mock.transform.FilterErrorTransform) ScheduleDetail(io.cdap.cdap.proto.ScheduleDetail) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) PrintWriter(java.io.PrintWriter) ValueFilter(io.cdap.cdap.datapipeline.plugin.ValueFilter) Table(io.cdap.cdap.api.dataset.table.Table) GroupFilterAggregator(io.cdap.cdap.etl.mock.batch.aggregator.GroupFilterAggregator) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PluginClass(io.cdap.cdap.api.plugin.PluginClass) SchedulableProgramType(io.cdap.cdap.api.schedule.SchedulableProgramType) InputField(io.cdap.cdap.api.lineage.field.InputField) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ByteStreams(com.google.common.io.ByteStreams) DataSetManager(io.cdap.cdap.test.DataSetManager) ServiceManager(io.cdap.cdap.test.ServiceManager) FileSetArguments(io.cdap.cdap.api.dataset.lib.FileSetArguments) IdentityTransform(io.cdap.cdap.etl.mock.transform.IdentityTransform) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) RunRecord(io.cdap.cdap.proto.RunRecord) ArrayList(java.util.ArrayList) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) PluggableFilterTransform(io.cdap.cdap.datapipeline.plugin.PluggableFilterTransform) MockRuntimeDatasetSink(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSink) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) MockExternalSource(io.cdap.cdap.etl.mock.batch.MockExternalSource) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) Charsets(com.google.common.base.Charsets) Message(io.cdap.cdap.api.messaging.Message) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) StringValueFilterTransform(io.cdap.cdap.etl.mock.transform.StringValueFilterTransform) Test(org.junit.Test) NaiveBayesClassifier(io.cdap.cdap.datapipeline.mock.NaiveBayesClassifier) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) File(java.io.File) IncapableSource(io.cdap.cdap.etl.mock.batch.IncapableSource) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schedulers(io.cdap.cdap.internal.app.runtime.schedule.store.Schedulers) BufferedReader(java.io.BufferedReader) Assert(org.junit.Assert) WordCount(io.cdap.cdap.datapipeline.spark.WordCount) FieldLineageAdmin(io.cdap.cdap.metadata.FieldLineageAdmin) HttpRequest(io.cdap.common.http.HttpRequest) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) Engine(io.cdap.cdap.etl.api.Engine) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) URL(java.net.URL) NaiveBayesTrainer(io.cdap.cdap.datapipeline.mock.NaiveBayesTrainer) HttpResponse(io.cdap.common.http.HttpResponse) TimeoutException(java.util.concurrent.TimeoutException) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) NodeStatesAction(io.cdap.cdap.etl.mock.batch.NodeStatesAction) Gson(com.google.gson.Gson) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Metadata(io.cdap.cdap.spi.metadata.Metadata) After(org.junit.After) ProgramStatus(io.cdap.cdap.api.ProgramStatus) DropNullTransform(io.cdap.cdap.etl.mock.transform.DropNullTransform) RunId(org.apache.twill.api.RunId) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) FieldCountAggregator(io.cdap.cdap.etl.mock.batch.aggregator.FieldCountAggregator) Tasks(io.cdap.cdap.common.utils.Tasks) DatasetFieldLineageSummary(io.cdap.cdap.metadata.DatasetFieldLineageSummary) ImmutableSet(com.google.common.collect.ImmutableSet) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) ImmutableMap(com.google.common.collect.ImmutableMap) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) NullAlertTransform(io.cdap.cdap.etl.mock.alert.NullAlertTransform) UUID(java.util.UUID) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ProgramStatusTrigger(io.cdap.cdap.internal.app.runtime.schedule.trigger.ProgramStatusTrigger) List(java.util.List) ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) IntValueFilterTransform(io.cdap.cdap.etl.mock.transform.IntValueFilterTransform) Constants(io.cdap.cdap.common.conf.Constants) ArtifactScope(io.cdap.cdap.api.artifact.ArtifactScope) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) Alert(io.cdap.cdap.etl.api.Alert) SleepTransform(io.cdap.cdap.etl.mock.transform.SleepTransform) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) BeforeClass(org.junit.BeforeClass) TestConfiguration(io.cdap.cdap.test.TestConfiguration) MockExternalSink(io.cdap.cdap.etl.mock.batch.MockExternalSink) MockCondition(io.cdap.cdap.etl.mock.condition.MockCondition) HydratorTestBase(io.cdap.cdap.etl.mock.test.HydratorTestBase) TMSAlertPublisher(io.cdap.cdap.etl.mock.alert.TMSAlertPublisher) HashMap(java.util.HashMap) DefaultHttpRequestConfig(io.cdap.cdap.common.http.DefaultHttpRequestConfig) NullFieldSplitterTransform(io.cdap.cdap.etl.mock.transform.NullFieldSplitterTransform) LineFilterProgram(io.cdap.cdap.datapipeline.spark.LineFilterProgram) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FlattenErrorTransform(io.cdap.cdap.etl.mock.transform.FlattenErrorTransform) FieldRelation(io.cdap.cdap.metadata.FieldRelation) Compat(io.cdap.cdap.etl.spark.Compat) HttpRequests(io.cdap.common.http.HttpRequests) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) PrintStream(java.io.PrintStream) FieldLineageAction(io.cdap.cdap.etl.mock.action.FieldLineageAction) FilterTransform(io.cdap.cdap.etl.mock.batch.FilterTransform) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) RunIds(io.cdap.cdap.common.app.RunIds) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) ProgramId(io.cdap.cdap.proto.id.ProgramId) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ServiceApp(io.cdap.cdap.datapipeline.service.ServiceApp) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) IncapableSink(io.cdap.cdap.etl.mock.batch.IncapableSink) TimeUnit(java.util.concurrent.TimeUnit) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) FileReader(java.io.FileReader) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Collections(java.util.Collections) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Engine(io.cdap.cdap.etl.api.Engine) Test(org.junit.Test)

Example 3 with Engine

use of io.cdap.cdap.etl.api.Engine in project cdap by caskdata.

the class DataPipelineTest method testSimpleUpgradePipelinesWithSnapshotArtifact.

/* Tests upgrade for a deployed application. Also tests that SNAPSHOT artifacts are being considered for upgrade.
     1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
     2. Add new versions of application artifact (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0).
     3. Also deploy a snapshot version of app artifact 1.3.0-SNAPSHOT and plugin artifact 1.1.1-SNAPSHOT bind to it.
     3. Upgrade the older deployed application.
     4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
        in its config and it uses snapshot versions for both.
   */
@Test
public void testSimpleUpgradePipelinesWithSnapshotArtifact() throws Exception {
    ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
    Engine engine = Engine.MAPREDUCE;
    String sourceName = "testSource" + engine.name();
    String sinkName = "testSink" + engine.name();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
    // Deploy app with artifact version 1.0.0.
    ApplicationManager appManager = deployApplication(appId, appRequest);
    ApplicationDetail oldAppDetail = getAppDetail(appId);
    ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Upgrade application with allowSnapshot set to true.
    appManager.upgrade(Collections.emptySet(), true);
    ApplicationDetail upgradedAppDetail = getAppDetail(appId);
    ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Compare stages that should be same after upgrade.
    Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
    Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
    // Verify that after upgrade, application upgrades artifact version to latest version available.
    Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_3_SNAPSHOT.getVersion(), upgradedAppDetail.getArtifact().getVersion());
    // Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
    ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
    Assert.assertEquals(upgradedPlugin.getArtifactConfig().getVersion(), "1.1.1-SNAPSHOT");
    Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.USER);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) LineageAdmin(io.cdap.cdap.metadata.LineageAdmin) Arrays(java.util.Arrays) MockJoiner(io.cdap.cdap.etl.mock.batch.joiner.MockJoiner) Bytes(io.cdap.cdap.api.common.Bytes) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) MockRuntimeDatasetSource(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSource) MockSink(io.cdap.cdap.etl.mock.batch.MockSink) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) MockAction(io.cdap.cdap.etl.mock.action.MockAction) MockSource(io.cdap.cdap.etl.mock.batch.MockSource) IdentityAggregator(io.cdap.cdap.etl.mock.batch.aggregator.IdentityAggregator) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ScheduleId(io.cdap.cdap.proto.id.ScheduleId) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) Map(java.util.Map) LookupTransform(io.cdap.cdap.etl.mock.batch.LookupTransform) MetadataAdmin(io.cdap.cdap.metadata.MetadataAdmin) ClassRule(org.junit.ClassRule) FilterErrorTransform(io.cdap.cdap.etl.mock.transform.FilterErrorTransform) ScheduleDetail(io.cdap.cdap.proto.ScheduleDetail) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) PrintWriter(java.io.PrintWriter) ValueFilter(io.cdap.cdap.datapipeline.plugin.ValueFilter) Table(io.cdap.cdap.api.dataset.table.Table) GroupFilterAggregator(io.cdap.cdap.etl.mock.batch.aggregator.GroupFilterAggregator) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PluginClass(io.cdap.cdap.api.plugin.PluginClass) SchedulableProgramType(io.cdap.cdap.api.schedule.SchedulableProgramType) InputField(io.cdap.cdap.api.lineage.field.InputField) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ByteStreams(com.google.common.io.ByteStreams) DataSetManager(io.cdap.cdap.test.DataSetManager) ServiceManager(io.cdap.cdap.test.ServiceManager) FileSetArguments(io.cdap.cdap.api.dataset.lib.FileSetArguments) IdentityTransform(io.cdap.cdap.etl.mock.transform.IdentityTransform) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) RunRecord(io.cdap.cdap.proto.RunRecord) ArrayList(java.util.ArrayList) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) PluggableFilterTransform(io.cdap.cdap.datapipeline.plugin.PluggableFilterTransform) MockRuntimeDatasetSink(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSink) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) MockExternalSource(io.cdap.cdap.etl.mock.batch.MockExternalSource) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) Charsets(com.google.common.base.Charsets) Message(io.cdap.cdap.api.messaging.Message) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) StringValueFilterTransform(io.cdap.cdap.etl.mock.transform.StringValueFilterTransform) Test(org.junit.Test) NaiveBayesClassifier(io.cdap.cdap.datapipeline.mock.NaiveBayesClassifier) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) File(java.io.File) IncapableSource(io.cdap.cdap.etl.mock.batch.IncapableSource) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schedulers(io.cdap.cdap.internal.app.runtime.schedule.store.Schedulers) BufferedReader(java.io.BufferedReader) Assert(org.junit.Assert) WordCount(io.cdap.cdap.datapipeline.spark.WordCount) FieldLineageAdmin(io.cdap.cdap.metadata.FieldLineageAdmin) HttpRequest(io.cdap.common.http.HttpRequest) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) Engine(io.cdap.cdap.etl.api.Engine) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) URL(java.net.URL) NaiveBayesTrainer(io.cdap.cdap.datapipeline.mock.NaiveBayesTrainer) HttpResponse(io.cdap.common.http.HttpResponse) TimeoutException(java.util.concurrent.TimeoutException) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) NodeStatesAction(io.cdap.cdap.etl.mock.batch.NodeStatesAction) Gson(com.google.gson.Gson) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Metadata(io.cdap.cdap.spi.metadata.Metadata) After(org.junit.After) ProgramStatus(io.cdap.cdap.api.ProgramStatus) DropNullTransform(io.cdap.cdap.etl.mock.transform.DropNullTransform) RunId(org.apache.twill.api.RunId) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) FieldCountAggregator(io.cdap.cdap.etl.mock.batch.aggregator.FieldCountAggregator) Tasks(io.cdap.cdap.common.utils.Tasks) DatasetFieldLineageSummary(io.cdap.cdap.metadata.DatasetFieldLineageSummary) ImmutableSet(com.google.common.collect.ImmutableSet) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) ImmutableMap(com.google.common.collect.ImmutableMap) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) NullAlertTransform(io.cdap.cdap.etl.mock.alert.NullAlertTransform) UUID(java.util.UUID) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ProgramStatusTrigger(io.cdap.cdap.internal.app.runtime.schedule.trigger.ProgramStatusTrigger) List(java.util.List) ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) IntValueFilterTransform(io.cdap.cdap.etl.mock.transform.IntValueFilterTransform) Constants(io.cdap.cdap.common.conf.Constants) ArtifactScope(io.cdap.cdap.api.artifact.ArtifactScope) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) Alert(io.cdap.cdap.etl.api.Alert) SleepTransform(io.cdap.cdap.etl.mock.transform.SleepTransform) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) BeforeClass(org.junit.BeforeClass) TestConfiguration(io.cdap.cdap.test.TestConfiguration) MockExternalSink(io.cdap.cdap.etl.mock.batch.MockExternalSink) MockCondition(io.cdap.cdap.etl.mock.condition.MockCondition) HydratorTestBase(io.cdap.cdap.etl.mock.test.HydratorTestBase) TMSAlertPublisher(io.cdap.cdap.etl.mock.alert.TMSAlertPublisher) HashMap(java.util.HashMap) DefaultHttpRequestConfig(io.cdap.cdap.common.http.DefaultHttpRequestConfig) NullFieldSplitterTransform(io.cdap.cdap.etl.mock.transform.NullFieldSplitterTransform) LineFilterProgram(io.cdap.cdap.datapipeline.spark.LineFilterProgram) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FlattenErrorTransform(io.cdap.cdap.etl.mock.transform.FlattenErrorTransform) FieldRelation(io.cdap.cdap.metadata.FieldRelation) Compat(io.cdap.cdap.etl.spark.Compat) HttpRequests(io.cdap.common.http.HttpRequests) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) PrintStream(java.io.PrintStream) FieldLineageAction(io.cdap.cdap.etl.mock.action.FieldLineageAction) FilterTransform(io.cdap.cdap.etl.mock.batch.FilterTransform) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) RunIds(io.cdap.cdap.common.app.RunIds) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) ProgramId(io.cdap.cdap.proto.id.ProgramId) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ServiceApp(io.cdap.cdap.datapipeline.service.ServiceApp) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) IncapableSink(io.cdap.cdap.etl.mock.batch.IncapableSink) TimeUnit(java.util.concurrent.TimeUnit) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) FileReader(java.io.FileReader) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Collections(java.util.Collections) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Engine(io.cdap.cdap.etl.api.Engine) Test(org.junit.Test)

Example 4 with Engine

use of io.cdap.cdap.etl.api.Engine in project cdap by caskdata.

the class DataPipelineTest method testUpgradePipelinesWithNoChangeInPluginRange.

/* Tests upgrade for a deployed application with a plugin using plugin range.
   1. Deploy an application with older application artifact (1.0.0) and filter plugin version with range
      [1.0.0-2.0.0) to make sure latest version of plugin should be included in it.
   2. Add new versions of application artifacts (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0) in
      SYSTEM scope (in test class setup).
   3. Upgrade the older deployed application.
   4. Verify that after upgrading, application artifact uses latest version in its config.
      But plugin range is not updated as latest version of plugin is still included in the range.
  */
@Test
public void testUpgradePipelinesWithNoChangeInPluginRange() throws Exception {
    ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "[1.0.0,2.0.0)");
    Engine engine = Engine.MAPREDUCE;
    String sourceName = "testSource" + engine.name();
    String sinkName = "testSink" + engine.name();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
    // Deploy app with artifact version 1.0.0.
    ApplicationManager appManager = deployApplication(appId, appRequest);
    ApplicationDetail oldAppDetail = getAppDetail(appId);
    ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Upgrade application with consider snapshot enabled to consider highest version of plugin available which is
    // 1.1.1-SNAPSHOT.
    appManager.upgrade(Collections.emptySet(), true);
    ApplicationDetail upgradedAppDetail = getAppDetail(appId);
    ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Compare stages that should be same after upgrade.
    Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
    Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
    // The filter stage is not changed as latest plugin is in the range.
    Assert.assertEquals(oldStageMap.get("filter"), newStageMap.get("filter"));
    // Verify that after upgrade, application upgrades artifact version to latest version available.
    Assert.assertEquals(upgradedAppDetail.getArtifact().getVersion(), UPGRADE_APP_ARTIFACT_ID_3_SNAPSHOT.getVersion());
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) HttpURLConnection(java.net.HttpURLConnection) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) LineageAdmin(io.cdap.cdap.metadata.LineageAdmin) Arrays(java.util.Arrays) MockJoiner(io.cdap.cdap.etl.mock.batch.joiner.MockJoiner) Bytes(io.cdap.cdap.api.common.Bytes) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) MockRuntimeDatasetSource(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSource) MockSink(io.cdap.cdap.etl.mock.batch.MockSink) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) MockAction(io.cdap.cdap.etl.mock.action.MockAction) MockSource(io.cdap.cdap.etl.mock.batch.MockSource) IdentityAggregator(io.cdap.cdap.etl.mock.batch.aggregator.IdentityAggregator) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ScheduleId(io.cdap.cdap.proto.id.ScheduleId) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) Map(java.util.Map) LookupTransform(io.cdap.cdap.etl.mock.batch.LookupTransform) MetadataAdmin(io.cdap.cdap.metadata.MetadataAdmin) ClassRule(org.junit.ClassRule) FilterErrorTransform(io.cdap.cdap.etl.mock.transform.FilterErrorTransform) ScheduleDetail(io.cdap.cdap.proto.ScheduleDetail) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) PrintWriter(java.io.PrintWriter) ValueFilter(io.cdap.cdap.datapipeline.plugin.ValueFilter) Table(io.cdap.cdap.api.dataset.table.Table) GroupFilterAggregator(io.cdap.cdap.etl.mock.batch.aggregator.GroupFilterAggregator) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PluginClass(io.cdap.cdap.api.plugin.PluginClass) SchedulableProgramType(io.cdap.cdap.api.schedule.SchedulableProgramType) InputField(io.cdap.cdap.api.lineage.field.InputField) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ByteStreams(com.google.common.io.ByteStreams) DataSetManager(io.cdap.cdap.test.DataSetManager) ServiceManager(io.cdap.cdap.test.ServiceManager) FileSetArguments(io.cdap.cdap.api.dataset.lib.FileSetArguments) IdentityTransform(io.cdap.cdap.etl.mock.transform.IdentityTransform) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) RunRecord(io.cdap.cdap.proto.RunRecord) ArrayList(java.util.ArrayList) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) PluggableFilterTransform(io.cdap.cdap.datapipeline.plugin.PluggableFilterTransform) MockRuntimeDatasetSink(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSink) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) MockExternalSource(io.cdap.cdap.etl.mock.batch.MockExternalSource) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) Charsets(com.google.common.base.Charsets) Message(io.cdap.cdap.api.messaging.Message) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) StringValueFilterTransform(io.cdap.cdap.etl.mock.transform.StringValueFilterTransform) Test(org.junit.Test) NaiveBayesClassifier(io.cdap.cdap.datapipeline.mock.NaiveBayesClassifier) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) File(java.io.File) IncapableSource(io.cdap.cdap.etl.mock.batch.IncapableSource) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schedulers(io.cdap.cdap.internal.app.runtime.schedule.store.Schedulers) BufferedReader(java.io.BufferedReader) Assert(org.junit.Assert) WordCount(io.cdap.cdap.datapipeline.spark.WordCount) FieldLineageAdmin(io.cdap.cdap.metadata.FieldLineageAdmin) HttpRequest(io.cdap.common.http.HttpRequest) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) Engine(io.cdap.cdap.etl.api.Engine) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) URL(java.net.URL) NaiveBayesTrainer(io.cdap.cdap.datapipeline.mock.NaiveBayesTrainer) HttpResponse(io.cdap.common.http.HttpResponse) TimeoutException(java.util.concurrent.TimeoutException) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) NodeStatesAction(io.cdap.cdap.etl.mock.batch.NodeStatesAction) Gson(com.google.gson.Gson) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Metadata(io.cdap.cdap.spi.metadata.Metadata) After(org.junit.After) ProgramStatus(io.cdap.cdap.api.ProgramStatus) DropNullTransform(io.cdap.cdap.etl.mock.transform.DropNullTransform) RunId(org.apache.twill.api.RunId) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) FieldCountAggregator(io.cdap.cdap.etl.mock.batch.aggregator.FieldCountAggregator) Tasks(io.cdap.cdap.common.utils.Tasks) DatasetFieldLineageSummary(io.cdap.cdap.metadata.DatasetFieldLineageSummary) ImmutableSet(com.google.common.collect.ImmutableSet) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) ImmutableMap(com.google.common.collect.ImmutableMap) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) NullAlertTransform(io.cdap.cdap.etl.mock.alert.NullAlertTransform) UUID(java.util.UUID) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ProgramStatusTrigger(io.cdap.cdap.internal.app.runtime.schedule.trigger.ProgramStatusTrigger) List(java.util.List) ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) IntValueFilterTransform(io.cdap.cdap.etl.mock.transform.IntValueFilterTransform) Constants(io.cdap.cdap.common.conf.Constants) ArtifactScope(io.cdap.cdap.api.artifact.ArtifactScope) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) Alert(io.cdap.cdap.etl.api.Alert) SleepTransform(io.cdap.cdap.etl.mock.transform.SleepTransform) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) BeforeClass(org.junit.BeforeClass) TestConfiguration(io.cdap.cdap.test.TestConfiguration) MockExternalSink(io.cdap.cdap.etl.mock.batch.MockExternalSink) MockCondition(io.cdap.cdap.etl.mock.condition.MockCondition) HydratorTestBase(io.cdap.cdap.etl.mock.test.HydratorTestBase) TMSAlertPublisher(io.cdap.cdap.etl.mock.alert.TMSAlertPublisher) HashMap(java.util.HashMap) DefaultHttpRequestConfig(io.cdap.cdap.common.http.DefaultHttpRequestConfig) NullFieldSplitterTransform(io.cdap.cdap.etl.mock.transform.NullFieldSplitterTransform) LineFilterProgram(io.cdap.cdap.datapipeline.spark.LineFilterProgram) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FlattenErrorTransform(io.cdap.cdap.etl.mock.transform.FlattenErrorTransform) FieldRelation(io.cdap.cdap.metadata.FieldRelation) Compat(io.cdap.cdap.etl.spark.Compat) HttpRequests(io.cdap.common.http.HttpRequests) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) PrintStream(java.io.PrintStream) FieldLineageAction(io.cdap.cdap.etl.mock.action.FieldLineageAction) FilterTransform(io.cdap.cdap.etl.mock.batch.FilterTransform) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) RunIds(io.cdap.cdap.common.app.RunIds) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) ProgramId(io.cdap.cdap.proto.id.ProgramId) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ServiceApp(io.cdap.cdap.datapipeline.service.ServiceApp) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) IncapableSink(io.cdap.cdap.etl.mock.batch.IncapableSink) TimeUnit(java.util.concurrent.TimeUnit) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) FileReader(java.io.FileReader) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Collections(java.util.Collections) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Engine(io.cdap.cdap.etl.api.Engine) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 5 with Engine

use of io.cdap.cdap.etl.api.Engine in project cdap by caskdata.

the class DataPipelineTest method testUpgradePipelinesWithPluginRange.

/* Tests upgrade for a deployed application with a plugin using plugin range.
   1. Deploy an application with older application artifact (1.0.0) and older filter plugin version with range
      [1.0.0-1.0.5).
   2. Add new versions of application artifacts (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0) in
      SYSTEM scope (in test class setup).
   3. Upgrade the older deployed application.
   4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
      in its config. Also verify that plugin version range for filter stage is changed to use newest version of plugin.
 */
@Test
public void testUpgradePipelinesWithPluginRange() throws Exception {
    ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "[1.0.0,1.0.5)");
    Engine engine = Engine.MAPREDUCE;
    String sourceName = "testSource" + engine.name();
    String sinkName = "testSink" + engine.name();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
    // Deploy app with artifact version 1.0.0.
    ApplicationManager appManager = deployApplication(appId, appRequest);
    ApplicationDetail oldAppDetail = getAppDetail(appId);
    ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Upgrade application.
    appManager.upgrade();
    ApplicationDetail upgradedAppDetail = getAppDetail(appId);
    ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Compare stages that should be same after upgrade.
    Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
    Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
    // Verify that after upgrade, application upgrades artifact version to latest version available.
    Assert.assertEquals(upgradedAppDetail.getArtifact().getVersion(), UPGRADE_APP_ARTIFACT_ID_2.getVersion());
    // Check if the filter stage, for which version range should be upgraded to include latest plugin version in SYSTEM
    // scope.
    ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
    Assert.assertEquals(upgradedPlugin.getArtifactConfig().getVersion(), "[1.0.0,1.1.0]");
    Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.SYSTEM);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) LineageAdmin(io.cdap.cdap.metadata.LineageAdmin) Arrays(java.util.Arrays) MockJoiner(io.cdap.cdap.etl.mock.batch.joiner.MockJoiner) Bytes(io.cdap.cdap.api.common.Bytes) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) MockRuntimeDatasetSource(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSource) MockSink(io.cdap.cdap.etl.mock.batch.MockSink) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) MockAction(io.cdap.cdap.etl.mock.action.MockAction) MockSource(io.cdap.cdap.etl.mock.batch.MockSource) IdentityAggregator(io.cdap.cdap.etl.mock.batch.aggregator.IdentityAggregator) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ScheduleId(io.cdap.cdap.proto.id.ScheduleId) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) Map(java.util.Map) LookupTransform(io.cdap.cdap.etl.mock.batch.LookupTransform) MetadataAdmin(io.cdap.cdap.metadata.MetadataAdmin) ClassRule(org.junit.ClassRule) FilterErrorTransform(io.cdap.cdap.etl.mock.transform.FilterErrorTransform) ScheduleDetail(io.cdap.cdap.proto.ScheduleDetail) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) PrintWriter(java.io.PrintWriter) ValueFilter(io.cdap.cdap.datapipeline.plugin.ValueFilter) Table(io.cdap.cdap.api.dataset.table.Table) GroupFilterAggregator(io.cdap.cdap.etl.mock.batch.aggregator.GroupFilterAggregator) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PluginClass(io.cdap.cdap.api.plugin.PluginClass) SchedulableProgramType(io.cdap.cdap.api.schedule.SchedulableProgramType) InputField(io.cdap.cdap.api.lineage.field.InputField) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ByteStreams(com.google.common.io.ByteStreams) DataSetManager(io.cdap.cdap.test.DataSetManager) ServiceManager(io.cdap.cdap.test.ServiceManager) FileSetArguments(io.cdap.cdap.api.dataset.lib.FileSetArguments) IdentityTransform(io.cdap.cdap.etl.mock.transform.IdentityTransform) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) RunRecord(io.cdap.cdap.proto.RunRecord) ArrayList(java.util.ArrayList) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) PluggableFilterTransform(io.cdap.cdap.datapipeline.plugin.PluggableFilterTransform) MockRuntimeDatasetSink(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSink) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) MockExternalSource(io.cdap.cdap.etl.mock.batch.MockExternalSource) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) Charsets(com.google.common.base.Charsets) Message(io.cdap.cdap.api.messaging.Message) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) StringValueFilterTransform(io.cdap.cdap.etl.mock.transform.StringValueFilterTransform) Test(org.junit.Test) NaiveBayesClassifier(io.cdap.cdap.datapipeline.mock.NaiveBayesClassifier) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) File(java.io.File) IncapableSource(io.cdap.cdap.etl.mock.batch.IncapableSource) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schedulers(io.cdap.cdap.internal.app.runtime.schedule.store.Schedulers) BufferedReader(java.io.BufferedReader) Assert(org.junit.Assert) WordCount(io.cdap.cdap.datapipeline.spark.WordCount) FieldLineageAdmin(io.cdap.cdap.metadata.FieldLineageAdmin) HttpRequest(io.cdap.common.http.HttpRequest) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) Engine(io.cdap.cdap.etl.api.Engine) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) URL(java.net.URL) NaiveBayesTrainer(io.cdap.cdap.datapipeline.mock.NaiveBayesTrainer) HttpResponse(io.cdap.common.http.HttpResponse) TimeoutException(java.util.concurrent.TimeoutException) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) NodeStatesAction(io.cdap.cdap.etl.mock.batch.NodeStatesAction) Gson(com.google.gson.Gson) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Metadata(io.cdap.cdap.spi.metadata.Metadata) After(org.junit.After) ProgramStatus(io.cdap.cdap.api.ProgramStatus) DropNullTransform(io.cdap.cdap.etl.mock.transform.DropNullTransform) RunId(org.apache.twill.api.RunId) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) FieldCountAggregator(io.cdap.cdap.etl.mock.batch.aggregator.FieldCountAggregator) Tasks(io.cdap.cdap.common.utils.Tasks) DatasetFieldLineageSummary(io.cdap.cdap.metadata.DatasetFieldLineageSummary) ImmutableSet(com.google.common.collect.ImmutableSet) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) ImmutableMap(com.google.common.collect.ImmutableMap) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) NullAlertTransform(io.cdap.cdap.etl.mock.alert.NullAlertTransform) UUID(java.util.UUID) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ProgramStatusTrigger(io.cdap.cdap.internal.app.runtime.schedule.trigger.ProgramStatusTrigger) List(java.util.List) ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) IntValueFilterTransform(io.cdap.cdap.etl.mock.transform.IntValueFilterTransform) Constants(io.cdap.cdap.common.conf.Constants) ArtifactScope(io.cdap.cdap.api.artifact.ArtifactScope) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) Alert(io.cdap.cdap.etl.api.Alert) SleepTransform(io.cdap.cdap.etl.mock.transform.SleepTransform) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) BeforeClass(org.junit.BeforeClass) TestConfiguration(io.cdap.cdap.test.TestConfiguration) MockExternalSink(io.cdap.cdap.etl.mock.batch.MockExternalSink) MockCondition(io.cdap.cdap.etl.mock.condition.MockCondition) HydratorTestBase(io.cdap.cdap.etl.mock.test.HydratorTestBase) TMSAlertPublisher(io.cdap.cdap.etl.mock.alert.TMSAlertPublisher) HashMap(java.util.HashMap) DefaultHttpRequestConfig(io.cdap.cdap.common.http.DefaultHttpRequestConfig) NullFieldSplitterTransform(io.cdap.cdap.etl.mock.transform.NullFieldSplitterTransform) LineFilterProgram(io.cdap.cdap.datapipeline.spark.LineFilterProgram) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FlattenErrorTransform(io.cdap.cdap.etl.mock.transform.FlattenErrorTransform) FieldRelation(io.cdap.cdap.metadata.FieldRelation) Compat(io.cdap.cdap.etl.spark.Compat) HttpRequests(io.cdap.common.http.HttpRequests) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) PrintStream(java.io.PrintStream) FieldLineageAction(io.cdap.cdap.etl.mock.action.FieldLineageAction) FilterTransform(io.cdap.cdap.etl.mock.batch.FilterTransform) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) RunIds(io.cdap.cdap.common.app.RunIds) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) ProgramId(io.cdap.cdap.proto.id.ProgramId) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ServiceApp(io.cdap.cdap.datapipeline.service.ServiceApp) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) IncapableSink(io.cdap.cdap.etl.mock.batch.IncapableSink) TimeUnit(java.util.concurrent.TimeUnit) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) FileReader(java.io.FileReader) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Collections(java.util.Collections) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Engine(io.cdap.cdap.etl.api.Engine) Test(org.junit.Test)

Aggregations

Charsets (com.google.common.base.Charsets)5 ImmutableList (com.google.common.collect.ImmutableList)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 ImmutableSet (com.google.common.collect.ImmutableSet)5 Sets (com.google.common.collect.Sets)5 ByteStreams (com.google.common.io.ByteStreams)5 Gson (com.google.gson.Gson)5 ProgramStatus (io.cdap.cdap.api.ProgramStatus)5 ArtifactScope (io.cdap.cdap.api.artifact.ArtifactScope)5 ArtifactSummary (io.cdap.cdap.api.artifact.ArtifactSummary)5 Bytes (io.cdap.cdap.api.common.Bytes)5 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)5 Schema (io.cdap.cdap.api.data.schema.Schema)5 CloseableIterator (io.cdap.cdap.api.dataset.lib.CloseableIterator)5 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)5 FileSetArguments (io.cdap.cdap.api.dataset.lib.FileSetArguments)5 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)5 Table (io.cdap.cdap.api.dataset.table.Table)5 EndPoint (io.cdap.cdap.api.lineage.field.EndPoint)5 InputField (io.cdap.cdap.api.lineage.field.InputField)5