Search in sources :

Example 1 with StageSchema

use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.

the class DataPipelineServiceTest method testMacroResolutionFromProperties.

@Test
public void testMacroResolutionFromProperties() throws Exception {
    // StringValueFilterTransform checks that the field exists in the input schema
    String stageName = "tx";
    Map<String, String> properties = new HashMap<>();
    properties.put("field", "x");
    properties.put("value", "${someProperty}");
    ETLStage stage = new ETLStage(stageName, new ETLPlugin(StringValueFilterTransform.NAME, Transform.PLUGIN_TYPE, properties));
    Schema inputSchema = Schema.recordOf("x", Schema.Field.of("x", Schema.of(Schema.Type.STRING)));
    // Set the preference value in the store
    getPreferencesService().setProperties(NamespaceId.DEFAULT, Collections.singletonMap("someProperty", "someValue"));
    // This call should include the resolved value for Field
    StageValidationRequest requestBody1 = new StageValidationRequest(stage, Collections.singletonList(new StageSchema("input", inputSchema)), true);
    StageValidationResponse actual1 = sendRequest(requestBody1);
    Assert.assertTrue(actual1.getFailures().isEmpty());
    Assert.assertNotNull(actual1.getSpec().getPlugin());
    Assert.assertEquals("someValue", actual1.getSpec().getPlugin().getProperties().get("value"));
    // This call should NOT include the resolved value for Field
    StageValidationRequest requestBody2 = new StageValidationRequest(stage, Collections.singletonList(new StageSchema("input", inputSchema)), false);
    StageValidationResponse actual2 = sendRequest(requestBody2);
    Assert.assertTrue(actual2.getFailures().isEmpty());
    Assert.assertNotNull(actual2.getSpec().getPlugin());
    Assert.assertEquals("${someProperty}", actual2.getSpec().getPlugin().getProperties().get("value"));
}
Also used : StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) StageValidationRequest(io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest) HashMap(java.util.HashMap) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) Schema(io.cdap.cdap.api.data.schema.Schema) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse) Test(org.junit.Test)

Example 2 with StageSchema

use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.

the class DataPipelineServiceTest method testValidationFailureForJoiner.

@Test
public void testValidationFailureForJoiner() throws Exception {
    String stageName = "joiner";
    // join key field t2_cust_name does not exist
    ETLStage stage = new ETLStage(stageName, MockJoiner.getPlugin("t1.customer_id=t2.cust_id&" + "t1.customer_name=t2.t2_cust_name", "t1,t2", ""));
    StageSchema inputSchema1 = new StageSchema("t1", Schema.recordOf("id", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING))));
    // t1.customer_id type string does not match t2.cust_id type int
    StageSchema inputSchema2 = new StageSchema("t2", Schema.recordOf("id", Schema.Field.of("cust_id", Schema.of(Schema.Type.INT)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING))));
    StageValidationRequest requestBody = new StageValidationRequest(stage, ImmutableList.of(inputSchema1, inputSchema2), false);
    StageValidationResponse actual = sendRequest(requestBody);
    Assert.assertNull(actual.getSpec());
    Assert.assertEquals(2, actual.getFailures().size());
    ValidationFailure fieldDoesNotExist = actual.getFailures().get(0);
    Assert.assertEquals(stageName, fieldDoesNotExist.getCauses().get(0).getAttribute(STAGE));
    Assert.assertEquals("t1.customer_id=t2.cust_id", fieldDoesNotExist.getCauses().get(0).getAttribute(CauseAttributes.CONFIG_ELEMENT));
    ValidationFailure typeMismatch = actual.getFailures().get(1);
    Assert.assertEquals(stageName, typeMismatch.getCauses().get(0).getAttribute(STAGE));
    Assert.assertEquals("t1.customer_name=t2.t2_cust_name", typeMismatch.getCauses().get(0).getAttribute(CauseAttributes.CONFIG_ELEMENT));
}
Also used : StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) StageValidationRequest(io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse) ValidationFailure(io.cdap.cdap.etl.api.validation.ValidationFailure) Test(org.junit.Test)

Example 3 with StageSchema

use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.

the class DataPipelineServiceTest method testValidationFailureForAggregator.

@Test
public void testValidationFailureForAggregator() throws Exception {
    String stageName = "ag";
    ETLStage stage = new ETLStage(stageName, DistinctAggregator.getPlugin("id,name"));
    // input schema does not contain name field
    Schema inputSchema = Schema.recordOf("id", Schema.Field.of("id", Schema.of(Schema.Type.STRING)));
    StageValidationRequest requestBody = new StageValidationRequest(stage, Collections.singletonList(new StageSchema("input", inputSchema)), false);
    StageValidationResponse actual = sendRequest(requestBody);
    Assert.assertNull(actual.getSpec());
    Assert.assertEquals(1, actual.getFailures().size());
    ValidationFailure failure = actual.getFailures().iterator().next();
    Assert.assertEquals(stageName, failure.getCauses().get(0).getAttribute(STAGE));
    Assert.assertEquals("fields", failure.getCauses().get(0).getAttribute(CauseAttributes.STAGE_CONFIG));
    Assert.assertEquals("name", failure.getCauses().get(0).getAttribute(CauseAttributes.CONFIG_ELEMENT));
}
Also used : StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) StageValidationRequest(io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) Schema(io.cdap.cdap.api.data.schema.Schema) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse) ValidationFailure(io.cdap.cdap.etl.api.validation.ValidationFailure) Test(org.junit.Test)

Example 4 with StageSchema

use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.

the class ValidationUtils method validate.

/**
 * Validate plugin based on the {@link StageValidationRequest}
 *
 * @param validationRequest {@link StageValidationRequest} with plugin properties
 * @param pluginConfigurer  {@link PluginConfigurer} for using the plugin
 * @param macroFn           {@link Function} for evaluating macros
 * @return {@link StageValidationResponse} in json format
 */
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
    ETLStage stageConfig = validationRequest.getStage();
    ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
    // Batch or Streaming doesn't matter for a single stage.
    PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
    DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
    for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
        stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
        stageConfigurer.addInputStage(stageSchema.getStage());
    }
    DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
    // evaluate macros
    Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
    ETLPlugin originalConfig = stageConfig.getPlugin();
    ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
    try {
        StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
        return new StageValidationResponse(spec);
    } catch (ValidationException e) {
        return new StageValidationResponse(e.getFailures());
    }
}
Also used : ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) BatchPipelineSpecGenerator(io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) DefaultStageConfigurer(io.cdap.cdap.etl.common.DefaultStageConfigurer) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ValidatingConfigurer(io.cdap.cdap.etl.validation.ValidatingConfigurer) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultPipelineConfigurer(io.cdap.cdap.etl.common.DefaultPipelineConfigurer) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse)

Example 5 with StageSchema

use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.

the class DataPipelineServiceTest method testValidateStageSingleInvalidConfigProperty.

@Test
public void testValidateStageSingleInvalidConfigProperty() throws Exception {
    // StringValueFilterTransform will be configured to filter records where field x has value 'y'
    // it will be invalid because the type of field x will be an int instead of the required string
    String stageName = "tx";
    Map<String, String> properties = new HashMap<>();
    properties.put("field", "x");
    properties.put("value", "y");
    ETLStage stage = new ETLStage(stageName, new ETLPlugin(StringValueFilterTransform.NAME, Transform.PLUGIN_TYPE, properties));
    Schema inputSchema = Schema.recordOf("x", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
    StageValidationRequest requestBody = new StageValidationRequest(stage, Collections.singletonList(new StageSchema("input", inputSchema)), false);
    StageValidationResponse actual = sendRequest(requestBody);
    Assert.assertNull(actual.getSpec());
    Assert.assertEquals(1, actual.getFailures().size());
    ValidationFailure failure = actual.getFailures().iterator().next();
    // the stage will add 2 causes for invalid input field failure. One is related to input field and the other is
    // related to config property.
    Assert.assertEquals(2, failure.getCauses().size());
    Assert.assertEquals("field", failure.getCauses().get(0).getAttribute(CauseAttributes.STAGE_CONFIG));
    Assert.assertEquals(stageName, failure.getCauses().get(0).getAttribute(STAGE));
    Assert.assertEquals("x", failure.getCauses().get(1).getAttribute(CauseAttributes.INPUT_SCHEMA_FIELD));
    Assert.assertEquals("input", failure.getCauses().get(1).getAttribute(CauseAttributes.INPUT_STAGE));
    Assert.assertEquals(stageName, failure.getCauses().get(1).getAttribute(STAGE));
}
Also used : StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) StageValidationRequest(io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest) HashMap(java.util.HashMap) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) StageSchema(io.cdap.cdap.etl.proto.v2.validation.StageSchema) Schema(io.cdap.cdap.api.data.schema.Schema) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StageValidationResponse(io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse) ValidationFailure(io.cdap.cdap.etl.api.validation.ValidationFailure) Test(org.junit.Test)

Aggregations

ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)10 StageSchema (io.cdap.cdap.etl.proto.v2.validation.StageSchema)10 StageValidationResponse (io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse)10 StageValidationRequest (io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest)9 Test (org.junit.Test)9 Schema (io.cdap.cdap.api.data.schema.Schema)8 ValidationFailure (io.cdap.cdap.etl.api.validation.ValidationFailure)5 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)5 HashMap (java.util.HashMap)5 ValidationException (io.cdap.cdap.etl.api.validation.ValidationException)1 BatchPipelineSpec (io.cdap.cdap.etl.batch.BatchPipelineSpec)1 BatchPipelineSpecGenerator (io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator)1 DefaultPipelineConfigurer (io.cdap.cdap.etl.common.DefaultPipelineConfigurer)1 DefaultStageConfigurer (io.cdap.cdap.etl.common.DefaultStageConfigurer)1 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)1 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)1 ValidatingConfigurer (io.cdap.cdap.etl.validation.ValidatingConfigurer)1