use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.
the class DataPipelineServiceTest method testMacroResolutionFromProperties.
@Test
public void testMacroResolutionFromProperties() throws Exception {
// StringValueFilterTransform checks that the field exists in the input schema
String stageName = "tx";
Map<String, String> properties = new HashMap<>();
properties.put("field", "x");
properties.put("value", "${someProperty}");
ETLStage stage = new ETLStage(stageName, new ETLPlugin(StringValueFilterTransform.NAME, Transform.PLUGIN_TYPE, properties));
Schema inputSchema = Schema.recordOf("x", Schema.Field.of("x", Schema.of(Schema.Type.STRING)));
// Set the preference value in the store
getPreferencesService().setProperties(NamespaceId.DEFAULT, Collections.singletonMap("someProperty", "someValue"));
// This call should include the resolved value for Field
StageValidationRequest requestBody1 = new StageValidationRequest(stage, Collections.singletonList(new StageSchema("input", inputSchema)), true);
StageValidationResponse actual1 = sendRequest(requestBody1);
Assert.assertTrue(actual1.getFailures().isEmpty());
Assert.assertNotNull(actual1.getSpec().getPlugin());
Assert.assertEquals("someValue", actual1.getSpec().getPlugin().getProperties().get("value"));
// This call should NOT include the resolved value for Field
StageValidationRequest requestBody2 = new StageValidationRequest(stage, Collections.singletonList(new StageSchema("input", inputSchema)), false);
StageValidationResponse actual2 = sendRequest(requestBody2);
Assert.assertTrue(actual2.getFailures().isEmpty());
Assert.assertNotNull(actual2.getSpec().getPlugin());
Assert.assertEquals("${someProperty}", actual2.getSpec().getPlugin().getProperties().get("value"));
}
use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.
the class DataPipelineServiceTest method testValidationFailureForJoiner.
@Test
public void testValidationFailureForJoiner() throws Exception {
String stageName = "joiner";
// join key field t2_cust_name does not exist
ETLStage stage = new ETLStage(stageName, MockJoiner.getPlugin("t1.customer_id=t2.cust_id&" + "t1.customer_name=t2.t2_cust_name", "t1,t2", ""));
StageSchema inputSchema1 = new StageSchema("t1", Schema.recordOf("id", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING))));
// t1.customer_id type string does not match t2.cust_id type int
StageSchema inputSchema2 = new StageSchema("t2", Schema.recordOf("id", Schema.Field.of("cust_id", Schema.of(Schema.Type.INT)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING))));
StageValidationRequest requestBody = new StageValidationRequest(stage, ImmutableList.of(inputSchema1, inputSchema2), false);
StageValidationResponse actual = sendRequest(requestBody);
Assert.assertNull(actual.getSpec());
Assert.assertEquals(2, actual.getFailures().size());
ValidationFailure fieldDoesNotExist = actual.getFailures().get(0);
Assert.assertEquals(stageName, fieldDoesNotExist.getCauses().get(0).getAttribute(STAGE));
Assert.assertEquals("t1.customer_id=t2.cust_id", fieldDoesNotExist.getCauses().get(0).getAttribute(CauseAttributes.CONFIG_ELEMENT));
ValidationFailure typeMismatch = actual.getFailures().get(1);
Assert.assertEquals(stageName, typeMismatch.getCauses().get(0).getAttribute(STAGE));
Assert.assertEquals("t1.customer_name=t2.t2_cust_name", typeMismatch.getCauses().get(0).getAttribute(CauseAttributes.CONFIG_ELEMENT));
}
use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.
the class DataPipelineServiceTest method testValidationFailureForAggregator.
@Test
public void testValidationFailureForAggregator() throws Exception {
String stageName = "ag";
ETLStage stage = new ETLStage(stageName, DistinctAggregator.getPlugin("id,name"));
// input schema does not contain name field
Schema inputSchema = Schema.recordOf("id", Schema.Field.of("id", Schema.of(Schema.Type.STRING)));
StageValidationRequest requestBody = new StageValidationRequest(stage, Collections.singletonList(new StageSchema("input", inputSchema)), false);
StageValidationResponse actual = sendRequest(requestBody);
Assert.assertNull(actual.getSpec());
Assert.assertEquals(1, actual.getFailures().size());
ValidationFailure failure = actual.getFailures().iterator().next();
Assert.assertEquals(stageName, failure.getCauses().get(0).getAttribute(STAGE));
Assert.assertEquals("fields", failure.getCauses().get(0).getAttribute(CauseAttributes.STAGE_CONFIG));
Assert.assertEquals("name", failure.getCauses().get(0).getAttribute(CauseAttributes.CONFIG_ELEMENT));
}
use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.
the class ValidationUtils method validate.
/**
* Validate plugin based on the {@link StageValidationRequest}
*
* @param validationRequest {@link StageValidationRequest} with plugin properties
* @param pluginConfigurer {@link PluginConfigurer} for using the plugin
* @param macroFn {@link Function} for evaluating macros
* @return {@link StageValidationResponse} in json format
*/
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
ETLStage stageConfig = validationRequest.getStage();
ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
// Batch or Streaming doesn't matter for a single stage.
PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
stageConfigurer.addInputStage(stageSchema.getStage());
}
DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
// evaluate macros
Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
ETLPlugin originalConfig = stageConfig.getPlugin();
ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
try {
StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
return new StageValidationResponse(spec);
} catch (ValidationException e) {
return new StageValidationResponse(e.getFailures());
}
}
use of io.cdap.cdap.etl.proto.v2.validation.StageSchema in project cdap by caskdata.
the class DataPipelineServiceTest method testValidateStageSingleInvalidConfigProperty.
@Test
public void testValidateStageSingleInvalidConfigProperty() throws Exception {
// StringValueFilterTransform will be configured to filter records where field x has value 'y'
// it will be invalid because the type of field x will be an int instead of the required string
String stageName = "tx";
Map<String, String> properties = new HashMap<>();
properties.put("field", "x");
properties.put("value", "y");
ETLStage stage = new ETLStage(stageName, new ETLPlugin(StringValueFilterTransform.NAME, Transform.PLUGIN_TYPE, properties));
Schema inputSchema = Schema.recordOf("x", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
StageValidationRequest requestBody = new StageValidationRequest(stage, Collections.singletonList(new StageSchema("input", inputSchema)), false);
StageValidationResponse actual = sendRequest(requestBody);
Assert.assertNull(actual.getSpec());
Assert.assertEquals(1, actual.getFailures().size());
ValidationFailure failure = actual.getFailures().iterator().next();
// the stage will add 2 causes for invalid input field failure. One is related to input field and the other is
// related to config property.
Assert.assertEquals(2, failure.getCauses().size());
Assert.assertEquals("field", failure.getCauses().get(0).getAttribute(CauseAttributes.STAGE_CONFIG));
Assert.assertEquals(stageName, failure.getCauses().get(0).getAttribute(STAGE));
Assert.assertEquals("x", failure.getCauses().get(1).getAttribute(CauseAttributes.INPUT_SCHEMA_FIELD));
Assert.assertEquals("input", failure.getCauses().get(1).getAttribute(CauseAttributes.INPUT_STAGE));
Assert.assertEquals(stageName, failure.getCauses().get(1).getAttribute(STAGE));
}
Aggregations