Search in sources :

Example 36 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project cdap by cdapio.

the class DataPipelineServiceTest method testValidateStageMissingRequiredProperty.

// tests that plugins that cannot be instantiated due to missing required properties are captured
@Test
public void testValidateStageMissingRequiredProperty() throws Exception {
    String stageName = "tx";
    // string filter requires the field name and the value
    ETLStage stage = new ETLStage(stageName, new ETLPlugin(StringValueFilterTransform.NAME, Transform.PLUGIN_TYPE, Collections.emptyMap()));
    StageValidationResponse actual = sendRequest(new StageValidationRequest(stage, Collections.emptyList(), false));
    Assert.assertNull(actual.getSpec());
    Assert.assertEquals(2, actual.getFailures().size());
    Set<String> properties = new HashSet<>();
    properties.add(actual.getFailures().get(0).getCauses().get(0).getAttribute(CauseAttributes.STAGE_CONFIG));
    properties.add(actual.getFailures().get(1).getCauses().get(0).getAttribute(CauseAttributes.STAGE_CONFIG));
    Set<String> expected = new HashSet<>();
    expected.add("field");
    expected.add("value");
    Assert.assertEquals(expected, properties);
}
Also used : StageValidationRequest(io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 37 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project cdap by cdapio.

the class DataPipelineTest method testSimpleUpgradePipelines.

/* Tests upgrade for a deployed application.
     1. Deploy an application with older application artifact (1.0.0) and older filter plugin version (1.0.0).
     2. Add new versions of application artifacts (0.0.9, 1.1.0, 1.2.0) and filter plugin artifacts (1.0.5, 1.1.0) in
        SYSTEM scope (in test class setup).
     3. Upgrade the older deployed application.
     4. Verify that after upgrading, application artifact and filter plugin artifact is upgraded to use latest version
        in its config.
   */
@Test
public void testSimpleUpgradePipelines() throws Exception {
    ArtifactSelectorConfig currentArtifactSelector = new ArtifactSelectorConfig(ArtifactScope.USER.name(), "test-plugins", "1.0.0");
    Engine engine = Engine.MAPREDUCE;
    String sourceName = "testSource" + engine.name();
    String sinkName = "testSink" + engine.name();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", PluggableFilterTransform.getPlugin(ValueFilter.NAME, ValueFilter.getProperties("${field}", "${value}"), currentArtifactSelector))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "filter").addConnection("filter", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
    // Deploy app with artifact version 1.0.0.
    ApplicationManager appManager = deployApplication(appId, appRequest);
    ApplicationDetail oldAppDetail = getAppDetail(appId);
    ETLBatchConfig oldBatchConfig = GSON.fromJson(oldAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> oldStageMap = oldBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Upgrade application.
    appManager.upgrade();
    ApplicationDetail upgradedAppDetail = getAppDetail(appId);
    ETLBatchConfig newBatchConfig = GSON.fromJson(upgradedAppDetail.getConfiguration(), ETLBatchConfig.class);
    Map<String, ETLStage> newStageMap = newBatchConfig.getStages().stream().collect(Collectors.toMap(ETLStage::getName, e -> e));
    // Compare stages that should be same after upgrade.
    Assert.assertEquals(oldStageMap.get("source"), newStageMap.get("source"));
    Assert.assertEquals(oldStageMap.get("sink"), newStageMap.get("sink"));
    // Verify that after upgrade, application upgrades artifact version to latest version available.
    Assert.assertEquals(UPGRADE_APP_ARTIFACT_ID_2.getVersion(), upgradedAppDetail.getArtifact().getVersion());
    // Check if the filter stage, for which version should be upgraded to desired version in SYSTEM scope.
    ETLPlugin upgradedPlugin = newStageMap.get("filter").getPlugin();
    Assert.assertEquals(upgradedPlugin.getArtifactConfig().getVersion(), "1.1.0");
    Assert.assertEquals(ArtifactScope.valueOf(upgradedPlugin.getArtifactConfig().getScope().toUpperCase()), ArtifactScope.SYSTEM);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) LineageAdmin(io.cdap.cdap.metadata.LineageAdmin) Arrays(java.util.Arrays) MockJoiner(io.cdap.cdap.etl.mock.batch.joiner.MockJoiner) Bytes(io.cdap.cdap.api.common.Bytes) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) MockRuntimeDatasetSource(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSource) MockSink(io.cdap.cdap.etl.mock.batch.MockSink) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) MockAction(io.cdap.cdap.etl.mock.action.MockAction) MockSource(io.cdap.cdap.etl.mock.batch.MockSource) IdentityAggregator(io.cdap.cdap.etl.mock.batch.aggregator.IdentityAggregator) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ScheduleId(io.cdap.cdap.proto.id.ScheduleId) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) Map(java.util.Map) LookupTransform(io.cdap.cdap.etl.mock.batch.LookupTransform) MetadataAdmin(io.cdap.cdap.metadata.MetadataAdmin) ClassRule(org.junit.ClassRule) FilterErrorTransform(io.cdap.cdap.etl.mock.transform.FilterErrorTransform) ScheduleDetail(io.cdap.cdap.proto.ScheduleDetail) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) PrintWriter(java.io.PrintWriter) ValueFilter(io.cdap.cdap.datapipeline.plugin.ValueFilter) Table(io.cdap.cdap.api.dataset.table.Table) GroupFilterAggregator(io.cdap.cdap.etl.mock.batch.aggregator.GroupFilterAggregator) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PluginClass(io.cdap.cdap.api.plugin.PluginClass) SchedulableProgramType(io.cdap.cdap.api.schedule.SchedulableProgramType) InputField(io.cdap.cdap.api.lineage.field.InputField) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ByteStreams(com.google.common.io.ByteStreams) DataSetManager(io.cdap.cdap.test.DataSetManager) ServiceManager(io.cdap.cdap.test.ServiceManager) FileSetArguments(io.cdap.cdap.api.dataset.lib.FileSetArguments) IdentityTransform(io.cdap.cdap.etl.mock.transform.IdentityTransform) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) RunRecord(io.cdap.cdap.proto.RunRecord) ArrayList(java.util.ArrayList) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) PluggableFilterTransform(io.cdap.cdap.datapipeline.plugin.PluggableFilterTransform) MockRuntimeDatasetSink(io.cdap.cdap.etl.mock.batch.MockRuntimeDatasetSink) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) MockExternalSource(io.cdap.cdap.etl.mock.batch.MockExternalSource) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) Charsets(com.google.common.base.Charsets) Message(io.cdap.cdap.api.messaging.Message) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) StringValueFilterTransform(io.cdap.cdap.etl.mock.transform.StringValueFilterTransform) Test(org.junit.Test) NaiveBayesClassifier(io.cdap.cdap.datapipeline.mock.NaiveBayesClassifier) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) File(java.io.File) IncapableSource(io.cdap.cdap.etl.mock.batch.IncapableSource) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Schedulers(io.cdap.cdap.internal.app.runtime.schedule.store.Schedulers) BufferedReader(java.io.BufferedReader) Assert(org.junit.Assert) WordCount(io.cdap.cdap.datapipeline.spark.WordCount) FieldLineageAdmin(io.cdap.cdap.metadata.FieldLineageAdmin) HttpRequest(io.cdap.common.http.HttpRequest) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) Engine(io.cdap.cdap.etl.api.Engine) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) URL(java.net.URL) NaiveBayesTrainer(io.cdap.cdap.datapipeline.mock.NaiveBayesTrainer) HttpResponse(io.cdap.common.http.HttpResponse) TimeoutException(java.util.concurrent.TimeoutException) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) NodeStatesAction(io.cdap.cdap.etl.mock.batch.NodeStatesAction) Gson(com.google.gson.Gson) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Metadata(io.cdap.cdap.spi.metadata.Metadata) After(org.junit.After) ProgramStatus(io.cdap.cdap.api.ProgramStatus) DropNullTransform(io.cdap.cdap.etl.mock.transform.DropNullTransform) RunId(org.apache.twill.api.RunId) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) FieldCountAggregator(io.cdap.cdap.etl.mock.batch.aggregator.FieldCountAggregator) Tasks(io.cdap.cdap.common.utils.Tasks) DatasetFieldLineageSummary(io.cdap.cdap.metadata.DatasetFieldLineageSummary) ImmutableSet(com.google.common.collect.ImmutableSet) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) ImmutableMap(com.google.common.collect.ImmutableMap) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) NullAlertTransform(io.cdap.cdap.etl.mock.alert.NullAlertTransform) UUID(java.util.UUID) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ProgramStatusTrigger(io.cdap.cdap.internal.app.runtime.schedule.trigger.ProgramStatusTrigger) List(java.util.List) ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) IntValueFilterTransform(io.cdap.cdap.etl.mock.transform.IntValueFilterTransform) Constants(io.cdap.cdap.common.conf.Constants) ArtifactScope(io.cdap.cdap.api.artifact.ArtifactScope) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) Alert(io.cdap.cdap.etl.api.Alert) SleepTransform(io.cdap.cdap.etl.mock.transform.SleepTransform) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) BeforeClass(org.junit.BeforeClass) TestConfiguration(io.cdap.cdap.test.TestConfiguration) MockExternalSink(io.cdap.cdap.etl.mock.batch.MockExternalSink) MockCondition(io.cdap.cdap.etl.mock.condition.MockCondition) HydratorTestBase(io.cdap.cdap.etl.mock.test.HydratorTestBase) TMSAlertPublisher(io.cdap.cdap.etl.mock.alert.TMSAlertPublisher) HashMap(java.util.HashMap) DefaultHttpRequestConfig(io.cdap.cdap.common.http.DefaultHttpRequestConfig) NullFieldSplitterTransform(io.cdap.cdap.etl.mock.transform.NullFieldSplitterTransform) LineFilterProgram(io.cdap.cdap.datapipeline.spark.LineFilterProgram) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FlattenErrorTransform(io.cdap.cdap.etl.mock.transform.FlattenErrorTransform) FieldRelation(io.cdap.cdap.metadata.FieldRelation) Compat(io.cdap.cdap.etl.spark.Compat) HttpRequests(io.cdap.common.http.HttpRequests) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) PrintStream(java.io.PrintStream) FieldLineageAction(io.cdap.cdap.etl.mock.action.FieldLineageAction) FilterTransform(io.cdap.cdap.etl.mock.batch.FilterTransform) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) RunIds(io.cdap.cdap.common.app.RunIds) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) ProgramId(io.cdap.cdap.proto.id.ProgramId) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ServiceApp(io.cdap.cdap.datapipeline.service.ServiceApp) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) IncapableSink(io.cdap.cdap.etl.mock.batch.IncapableSink) TimeUnit(java.util.concurrent.TimeUnit) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) FileReader(java.io.FileReader) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) Collections(java.util.Collections) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ApplicationDetail(io.cdap.cdap.proto.ApplicationDetail) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Engine(io.cdap.cdap.etl.api.Engine) Test(org.junit.Test)

Example 38 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project cdap by cdapio.

the class ValidationUtils method validate.

/**
 * Validate plugin based on the {@link StageValidationRequest}
 *
 * @param validationRequest {@link StageValidationRequest} with plugin properties
 * @param pluginConfigurer  {@link PluginConfigurer} for using the plugin
 * @param macroFn           {@link Function} for evaluating macros
 * @return {@link StageValidationResponse} in json format
 */
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
    ETLStage stageConfig = validationRequest.getStage();
    ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
    // Batch or Streaming doesn't matter for a single stage.
    PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
    DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
    for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
        stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
        stageConfigurer.addInputStage(stageSchema.getStage());
    }
    DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
    // evaluate macros
    Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
    ETLPlugin originalConfig = stageConfig.getPlugin();
    ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
    try {
        StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
        return new StageValidationResponse(spec);
    } catch (ValidationException e) {
        return new StageValidationResponse(e.getFailures());
    }
}
Also used : ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) BatchPipelineSpecGenerator(io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ValidatingConfigurer(io.cdap.cdap.etl.validation.ValidatingConfigurer) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse)

Example 39 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project cdap by cdapio.

the class AutoJoinerTest method testInnerBetweenCondition.

@Test
public void testInnerBetweenCondition() throws Exception {
    /*
         users ----------|
                         |--> join --> sink
         age_groups -----|

         joinOn: users.age > age_groups.lo and (users.age <= age_groups.hi or age_groups.hi is null)
     */
    Schema userSchema = Schema.recordOf("user", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.nullableOf(Schema.of(Schema.Type.INT))));
    Schema ageGroupSchema = Schema.recordOf("age_group", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("lo", Schema.of(Schema.Type.INT)), Schema.Field.of("hi", Schema.nullableOf(Schema.of(Schema.Type.INT))));
    Schema expectedSchema = Schema.recordOf("users.age_groups", Schema.Field.of("username", Schema.of(Schema.Type.STRING)), Schema.Field.of("age_group", Schema.of(Schema.Type.STRING)));
    String userInput = UUID.randomUUID().toString();
    String agesInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    List<JoinField> select = new ArrayList<>();
    select.add(new JoinField("users", "name", "username"));
    select.add(new JoinField("age_groups", "name", "age_group"));
    JoinCondition.OnExpression condition = JoinCondition.onExpression().setExpression("users.age >= age_groups.lo and (users.age < age_groups.hi or age_groups.hi is null)").build();
    Map<String, String> joinerProperties = MockAutoJoiner.getProperties(Arrays.asList("users", "age_groups"), Collections.emptyList(), Arrays.asList("users", "age_groups"), Collections.emptyList(), select, false, null, condition);
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput, userSchema))).addStage(new ETLStage("age_groups", MockSource.getPlugin(agesInput, ageGroupSchema))).addStage(new ETLStage("join", new ETLPlugin(MockAutoJoiner.NAME, BatchJoiner.PLUGIN_TYPE, joinerProperties))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("age_groups", "join").addConnection("join", "sink").setEngine(Engine.SPARK).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    List<StructuredRecord> records = new ArrayList<>();
    records.add(StructuredRecord.builder(userSchema).set("name", "Alice").set("age", 35).build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Bob").build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Carl").set("age", 13).build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Dave").set("age", 0).build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Elaine").set("age", 68).build());
    records.add(StructuredRecord.builder(userSchema).set("name", "Fred").set("age", 4).build());
    DataSetManager<Table> inputManager = getDataset(userInput);
    MockSource.writeInput(inputManager, records);
    records.clear();
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "infant").set("lo", 0).set("hi", 2).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "toddler").set("lo", 2).set("hi", 5).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "child").set("lo", 5).set("hi", 13).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "teen").set("lo", 13).set("hi", 20).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "adult").set("lo", 20).set("hi", 65).build());
    records.add(StructuredRecord.builder(ageGroupSchema).set("name", "senior").set("lo", 65).build());
    inputManager = getDataset(agesInput);
    MockSource.writeInput(inputManager, records);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Set<StructuredRecord> expected = new HashSet<>();
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Alice").set("age_group", "adult").build());
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Carl").set("age_group", "teen").build());
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Dave").set("age_group", "infant").build());
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Elaine").set("age_group", "senior").build());
    expected.add(StructuredRecord.builder(expectedSchema).set("username", "Fred").set("age_group", "toddler").build());
    Assert.assertEquals(expected, new HashSet<>(outputRecords));
    validateMetric(6, appId, "users.records.out");
    validateMetric(6, appId, "age_groups.records.out");
    validateMetric(12, appId, "join.records.in");
    validateMetric(expected.size(), appId, "join.records.out");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) JoinField(io.cdap.cdap.etl.api.join.JoinField) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) JoinCondition(io.cdap.cdap.etl.api.join.JoinCondition) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 40 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project cdap by cdapio.

the class DataPipelineServiceTest method testValidateStageMultipleErrors.

// test that multiple exceptions set in an InvalidStageException are captured as failures
@Test
public void testValidateStageMultipleErrors() throws Exception {
    // configure an invalid regex and a set the source and destination to the same value,
    // which should generate 2 errors
    String stageName = "stg";
    Map<String, String> properties = new HashMap<>();
    properties.put("filterRegex", "[");
    properties.put("sourceFileset", "files");
    properties.put("destinationFileset", "files");
    ETLStage stage = new ETLStage(stageName, new ETLPlugin(FileMoveAction.NAME, Action.PLUGIN_TYPE, properties));
    StageValidationRequest request = new StageValidationRequest(stage, Collections.emptyList(), false);
    StageValidationResponse actual = sendRequest(request);
    Assert.assertNull(actual.getSpec());
    Assert.assertEquals(2, actual.getFailures().size());
    ValidationFailure failure1 = actual.getFailures().get(0);
    Assert.assertEquals("filterRegex", failure1.getCauses().get(0).getAttribute(CauseAttributes.STAGE_CONFIG));
    Assert.assertEquals(stageName, failure1.getCauses().get(0).getAttribute(STAGE));
    // failure 2 should have 2 causes one for each config property
    ValidationFailure failure2 = actual.getFailures().get(1);
    Assert.assertEquals(2, failure2.getCauses().size());
}
Also used : StageValidationRequest(io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest) HashMap(java.util.HashMap) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse) ValidationFailure(io.cdap.cdap.etl.api.validation.ValidationFailure) Test(org.junit.Test)

Aggregations

ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)154 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)144 Test (org.junit.Test)136 ApplicationManager (io.cdap.cdap.test.ApplicationManager)102 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)101 Table (io.cdap.cdap.api.dataset.table.Table)79 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)77 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)70 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)59 HashMap (java.util.HashMap)57 Schema (io.cdap.cdap.api.data.schema.Schema)55 WorkflowManager (io.cdap.cdap.test.WorkflowManager)53 HashSet (java.util.HashSet)37 ImmutableMap (com.google.common.collect.ImmutableMap)36 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)35 ArrayList (java.util.ArrayList)34 StageValidationResponse (io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse)20 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)18 StageValidationRequest (io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest)18 File (java.io.File)17