Search in sources :

Example 76 with ETLStage

use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class DataPipelineTest method testKVTableLookup.

@Test
public void testKVTableLookup() throws Exception {
    addDatasetInstance(KeyValueTable.class.getName(), "ageTable");
    DataSetManager<KeyValueTable> lookupTable = getDataset("ageTable");
    lookupTable.get().write("samuel".getBytes(Charsets.UTF_8), "12".getBytes(Charsets.UTF_8));
    lookupTable.get().write("bob".getBytes(Charsets.UTF_8), "36".getBytes(Charsets.UTF_8));
    lookupTable.get().write("jane".getBytes(Charsets.UTF_8), "25".getBytes(Charsets.UTF_8));
    lookupTable.flush();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin("inputTable"))).addStage(new ETLStage("transform", LookupTransform.getPlugin("person", "age", "ageTable"))).addStage(new ETLStage("sink", MockSink.getPlugin("outputTable"))).addConnection("source", "transform").addConnection("transform", "sink").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("testKVTableLookup");
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // set up input data
    Schema inputSchema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema).set("person", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema).set("person", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema).set("person", "jane").build();
    DataSetManager<Table> inputTable = getDataset("inputTable");
    MockSource.writeInput(inputTable, ImmutableList.of(recordSamuel, recordBob, recordJane));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME).start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    Schema schema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.STRING)));
    Set<StructuredRecord> expected = new HashSet<>();
    expected.add(StructuredRecord.builder(schema).set("person", "samuel").set("age", "12").build());
    expected.add(StructuredRecord.builder(schema).set("person", "bob").set("age", "36").build());
    expected.add(StructuredRecord.builder(schema).set("person", "jane").set("age", "25").build());
    DataSetManager<Table> outputTable = getDataset("outputTable");
    Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(outputTable));
    Assert.assertEquals(expected, actual);
    validateMetric(3, appId, "source.records.out");
    validateMetric(3, appId, "sink.records.in");
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset("inputTable"));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset("outputTable"));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 77 with ETLStage

use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class DataPipelineTest method testNestedCondition.

private void testNestedCondition(Engine engine) throws Exception {
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     * action1----->|
     *              |
     * action2----->|
     *              |
     *              V
     * source --> condition1 --> sink1
     *              |
     *              |------->condition2 --> sink2
     *                          |
     *                          |-------> condition3----> sink3----->action3----->condition4---->action4
     *                                                                                |
     *                                                                                |------>action5
     */
    String appName = "NestedCondition-" + engine;
    String source = appName + "Source-" + engine;
    String sink1 = appName + "Sink1-" + engine;
    String sink2 = appName + "Sink2-" + engine;
    String sink3 = appName + "Sink3-" + engine;
    String actionTable = "actionTable" + appName + "-" + engine;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(source, schema))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2))).addStage(new ETLStage("sink3", MockSink.getPlugin(sink3))).addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, "row2", "key2", "val2"))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, "row3", "key3", "val3"))).addStage(new ETLStage("action4", MockAction.getPlugin(actionTable, "row4", "key4", "val4"))).addStage(new ETLStage("action5", MockAction.getPlugin(actionTable, "row5", "key5", "val5"))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addStage(new ETLStage("condition3", MockCondition.getPlugin("condition3"))).addStage(new ETLStage("condition4", MockCondition.getPlugin("condition4"))).addConnection("action1", "condition1").addConnection("action2", "condition1").addConnection("source", "condition1").addConnection("condition1", "sink1", true).addConnection("condition1", "condition2", false).addConnection("condition2", "sink2", true).addConnection("condition2", "condition3", false).addConnection("condition3", "sink3", true).addConnection("sink3", "action3").addConnection("action3", "condition4").addConnection("condition4", "action4", true).addConnection("condition4", "action5", false).setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app(appName);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    // write records to source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start(ImmutableMap.of("condition3.branch.to.execute", "true", "condition4.branch.to.execute", "true"));
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // check sink
    DataSetManager<Table> sinkManager = getDataset(sink3);
    Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    // other sinks should not have any data in them
    sinkManager = getDataset(sink1);
    Assert.assertTrue(MockSink.readOutput(sinkManager).isEmpty());
    sinkManager = getDataset(sink2);
    Assert.assertTrue(MockSink.readOutput(sinkManager).isEmpty());
    // check actions
    DataSetManager<Table> actionTableDS = getDataset(actionTable);
    // action1 is executed
    Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
    // action2 is executed
    Assert.assertEquals("val2", MockAction.readOutput(actionTableDS, "row2", "key2"));
    // action3 is executed
    Assert.assertEquals("val3", MockAction.readOutput(actionTableDS, "row3", "key3"));
    // action4 is executed
    Assert.assertEquals("val4", MockAction.readOutput(actionTableDS, "row4", "key4"));
    // action5 should not get executed.
    Assert.assertNull(MockAction.readOutput(actionTableDS, "row5", "key5"));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Example 78 with ETLStage

use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class DataPipelineTest method testSimpleUpgradePipelinesWithSnapshotArtifact.

/* Tests upgrade for a deployed application. Also tests that SNAPSHOT artifacts are being considered for upgrade.
     1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
     2. Add new versions of application artifact (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0).
     3. Also deploy a snapshot version of app artifact 1.3.0-SNAPSHOT and plugin artifact 1.1.1-SNAPSHOT bind to it.
     3. Upgrade the older deployed application.
     4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
        in its config and it uses snapshot versions for both.
   */
@Test
public void testSimpleUpgradePipelinesWithSnapshotArtifact() throws Exception {
    ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
    Engine engine = Engine.MAPREDUCE;
    String sourceName = "testSource" + engine.name();
    String sinkName = "testSink" + engine.name();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
    // Deploy app with artifact version 1.0.0.
    ApplicationManager appManager = deployApplication(appId, appRequest);
    ApplicationDetail oldAppDetail = getAppDetail(appId);
    ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Upgrade application with allowSnapshot set to true.
    appManager.upgrade(Collections.emptySet(), true);
    ApplicationDetail upgradedAppDetail = getAppDetail(appId);
    ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Compare stages that should be same after upgrade.
    Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
    Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
    // Verify that after upgrade, application upgrades artifact version to latest version available.
    Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_3_SNAPSHOT.getVersion(), upgradedAppDetail.getArtifact().getVersion());
    // Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
    ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
    Assert.assertEquals(upgradedPlugin.getArtifactConfig().getVersion(), "1.1.1-SNAPSHOT");
    Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.USER);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) LineageAdmin(io.cdap.cdap.metadata.LineageAdmin) Arrays(java.util.Arrays) MockJoiner(io.cdap.cdap.etl.mock.batch.joiner.MockJoiner) Bytes(io.cdap.cdap.api.common.Bytes) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) MockRuntimeDatasetSource(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSource) MockSink(io.cdap.cdap.etl.mock.batch.MockSink) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) MockAction(io.cdap.cdap.etl.mock.action.MockAction) MockSource(io.cdap.cdap.etl.mock.batch.MockSource) IdentityAggregator(io.cdap.cdap.etl.mock.batch.aggregator.IdentityAggregator) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ScheduleId(io.cdap.cdap.proto.id.ScheduleId) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) Map(java.util.Map) LookupTransform(io.cdap.cdap.etl.mock.batch.LookupTransform) MetadataAdmin(io.cdap.cdap.metadata.MetadataAdmin) ClassRule(org.junit.ClassRule) FilterErrorTransform(io.cdap.cdap.etl.mock.transform.FilterErrorTransform) ScheduleDetail(io.cdap.cdap.proto.ScheduleDetail) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) PrintWriter(java.io.PrintWriter) ValueFilter(io.cdap.cdap.datapipeline.plugin.ValueFilter) Table(io.cdap.cdap.api.dataset.table.Table) GroupFilterAggregator(io.cdap.cdap.etl.mock.batch.aggregator.GroupFilterAggregator) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PluginClass(io.cdap.cdap.api.plugin.PluginClass) SchedulableProgramType(io.cdap.cdap.api.schedule.SchedulableProgramType) InputField(io.cdap.cdap.api.lineage.field.InputField) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ByteStreams(com.google.common.io.ByteStreams) DataSetManager(io.cdap.cdap.test.DataSetManager) ServiceManager(io.cdap.cdap.test.ServiceManager) FileSetArguments(io.cdap.cdap.api.dataset.lib.FileSetArguments) IdentityTransform(io.cdap.cdap.etl.mock.transform.IdentityTransform) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) RunRecord(io.cdap.cdap.proto.RunRecord) ArrayList(java.util.ArrayList) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) PluggableFilterTransform(io.cdap.cdap.datapipeline.plugin.PluggableFilterTransform) MockRuntimeDatasetSink(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSink) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) MockExternalSource(io.cdap.cdap.etl.mock.batch.MockExternalSource) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) Charsets(com.google.common.base.Charsets) Message(io.cdap.cdap.api.messaging.Message) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) StringValueFilterTransform(io.cdap.cdap.etl.mock.transform.StringValueFilterTransform) Test(org.junit.Test) NaiveBayesClassifier(io.cdap.cdap.datapipeline.mock.NaiveBayesClassifier) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) File(java.io.File) IncapableSource(io.cdap.cdap.etl.mock.batch.IncapableSource) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schedulers(io.cdap.cdap.internal.app.runtime.schedule.store.Schedulers) BufferedReader(java.io.BufferedReader) Assert(org.junit.Assert) WordCount(io.cdap.cdap.datapipeline.spark.WordCount) FieldLineageAdmin(io.cdap.cdap.metadata.FieldLineageAdmin) HttpRequest(io.cdap.common.http.HttpRequest) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) Engine(io.cdap.cdap.etl.api.Engine) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) URL(java.net.URL) NaiveBayesTrainer(io.cdap.cdap.datapipeline.mock.NaiveBayesTrainer) HttpResponse(io.cdap.common.http.HttpResponse) TimeoutException(java.util.concurrent.TimeoutException) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) NodeStatesAction(io.cdap.cdap.etl.mock.batch.NodeStatesAction) Gson(com.google.gson.Gson) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Metadata(io.cdap.cdap.spi.metadata.Metadata) After(org.junit.After) ProgramStatus(io.cdap.cdap.api.ProgramStatus) DropNullTransform(io.cdap.cdap.etl.mock.transform.DropNullTransform) RunId(org.apache.twill.api.RunId) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) FieldCountAggregator(io.cdap.cdap.etl.mock.batch.aggregator.FieldCountAggregator) Tasks(io.cdap.cdap.common.utils.Tasks) DatasetFieldLineageSummary(io.cdap.cdap.metadata.DatasetFieldLineageSummary) ImmutableSet(com.google.common.collect.ImmutableSet) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) ImmutableMap(com.google.common.collect.ImmutableMap) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) NullAlertTransform(io.cdap.cdap.etl.mock.alert.NullAlertTransform) UUID(java.util.UUID) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ProgramStatusTrigger(io.cdap.cdap.internal.app.runtime.schedule.trigger.ProgramStatusTrigger) List(java.util.List) ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) IntValueFilterTransform(io.cdap.cdap.etl.mock.transform.IntValueFilterTransform) Constants(io.cdap.cdap.common.conf.Constants) ArtifactScope(io.cdap.cdap.api.artifact.ArtifactScope) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) Alert(io.cdap.cdap.etl.api.Alert) SleepTransform(io.cdap.cdap.etl.mock.transform.SleepTransform) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) BeforeClass(org.junit.BeforeClass) TestConfiguration(io.cdap.cdap.test.TestConfiguration) MockExternalSink(io.cdap.cdap.etl.mock.batch.MockExternalSink) MockCondition(io.cdap.cdap.etl.mock.condition.MockCondition) HydratorTestBase(io.cdap.cdap.etl.mock.test.HydratorTestBase) TMSAlertPublisher(io.cdap.cdap.etl.mock.alert.TMSAlertPublisher) HashMap(java.util.HashMap) DefaultHttpRequestConfig(io.cdap.cdap.common.http.DefaultHttpRequestConfig) NullFieldSplitterTransform(io.cdap.cdap.etl.mock.transform.NullFieldSplitterTransform) LineFilterProgram(io.cdap.cdap.datapipeline.spark.LineFilterProgram) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FlattenErrorTransform(io.cdap.cdap.etl.mock.transform.FlattenErrorTransform) FieldRelation(io.cdap.cdap.metadata.FieldRelation) Compat(io.cdap.cdap.etl.spark.Compat) HttpRequests(io.cdap.common.http.HttpRequests) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) PrintStream(java.io.PrintStream) FieldLineageAction(io.cdap.cdap.etl.mock.action.FieldLineageAction) FilterTransform(io.cdap.cdap.etl.mock.batch.FilterTransform) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) RunIds(io.cdap.cdap.common.app.RunIds) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) ProgramId(io.cdap.cdap.proto.id.ProgramId) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ServiceApp(io.cdap.cdap.datapipeline.service.ServiceApp) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) IncapableSink(io.cdap.cdap.etl.mock.batch.IncapableSink) TimeUnit(java.util.concurrent.TimeUnit) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) FileReader(java.io.FileReader) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Collections(java.util.Collections) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Engine(io.cdap.cdap.etl.api.Engine) Test(org.junit.Test)

Example 79 with ETLStage

use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class DataPipelineTest method testNoConnectorsForSourceCondition.

@Test
public void testNoConnectorsForSourceCondition() throws Exception {
    // 
    // condition1-->condition2-->source-->sink
    // 
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin("simpleNoConnectorConditionSource", schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin("trueOutput"))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addConnection("condition1", "condition2", true).addConnection("condition2", "source", true).addConnection("source", "trueSink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("NoConnectorForSourceConditionApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    // write records to source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("simpleNoConnectorConditionSource"));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start(ImmutableMap.of("condition1.branch.to.execute", "true", "condition2.branch.to.execute", "true"));
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // check sink
    DataSetManager<Table> sinkManager = getDataset("trueOutput");
    Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 80 with ETLStage

use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class DataPipelineTest method testFailureToStartIncapableProgram.

@Test
public void testFailureToStartIncapableProgram() throws Exception {
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", IncapableSource.getPlugin())).addStage(new ETLStage("sink", IncapableSink.getPlugin())).addConnection("source", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("IncapableApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    // starting the workflow should throw incapable exception as the pipeline contains incapable plugins
    workflowManager.start();
    // the program should fail as it has incapable plugins
    workflowManager.waitForRun(ProgramRunStatus.FAILED, 5, TimeUnit.MINUTES);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)157 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)154 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)119 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)93 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)90 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)89 ApplicationManager (io.cdap.cdap.test.ApplicationManager)87 Schema (io.cdap.cdap.api.data.schema.Schema)81 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)78 Table (io.cdap.cdap.api.dataset.table.Table)76 WorkflowManager (io.cdap.cdap.test.WorkflowManager)72 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)70 AppRequest (co.cask.cdap.proto.artifact.AppRequest)57 ApplicationId (co.cask.cdap.proto.id.ApplicationId)57 ApplicationManager (co.cask.cdap.test.ApplicationManager)53 Schema (co.cask.cdap.api.data.schema.Schema)46 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)46 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)45 HashSet (java.util.HashSet)45 Table (co.cask.cdap.api.dataset.table.Table)44