use of io.cdap.cdap.etl.batch.BatchPipelineSpec in project cdap by caskdata.
the class ValidationUtils method validate.
/**
* Validate plugin based on the {@link StageValidationRequest}
*
* @param validationRequest {@link StageValidationRequest} with plugin properties
* @param pluginConfigurer {@link PluginConfigurer} for using the plugin
* @param macroFn {@link Function} for evaluating macros
* @return {@link StageValidationResponse} in json format
*/
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
ETLStage stageConfig = validationRequest.getStage();
ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
// Batch or Streaming doesn't matter for a single stage.
PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
stageConfigurer.addInputStage(stageSchema.getStage());
}
DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
// evaluate macros
Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
ETLPlugin originalConfig = stageConfig.getPlugin();
ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
try {
StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
return new StageValidationResponse(spec);
} catch (ValidationException e) {
return new StageValidationResponse(e.getFailures());
}
}
use of io.cdap.cdap.etl.batch.BatchPipelineSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testGenerateSpec.
@Test
public void testGenerateSpec() throws ValidationException {
/*
* ---- t1 ------------
* | | |
* source --- | |--- t3 --- sink1
* | | |
* ------------ t2 --------------- sink2
* | |
* | |
* -------------------------
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addStage(new ETLStage("t3", MOCK_TRANSFORM_B)).addConnection("source", "t1").addConnection("source", "t2").addConnection("source", "sink2").addConnection("t1", "t2").addConnection("t1", "t3").addConnection("t1", "sink2").addConnection("t2", "sink2").addConnection("t2", "t3").addConnection("t3", "sink1").setNumOfRecordsPreview(100).build();
// test the spec generated is correct, with the right input and output schemas and artifact information.
BatchPipelineSpec actual = specGenerator.generateSpec(etlConfig);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "t1", "t2", "sink2").build()).addStage(StageSpec.builder("sink1", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("t3", SCHEMA_B).setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("sink2", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A, "source", SCHEMA_A)).setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("t1", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_A, "t2", "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t2", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("source", SCHEMA_A, "t1", SCHEMA_A)).addOutput(SCHEMA_A, "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t3", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A)).addOutput(SCHEMA_B, "sink1").setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).setNumOfRecordsPreview(etlConfig.getNumOfRecordsPreview()).build();
Assert.assertEquals(expected, actual);
}
Aggregations