use of io.cdap.cdap.etl.api.validation.ValidationException in project cdap by caskdata.
the class RemoteConnectionTestTask method execute.
@Override
public String execute(SystemAppTaskContext systemAppContext, RemoteConnectionRequest request) throws Exception {
String namespace = request.getNamespace();
ConnectionCreationRequest connectionCreationRequest = GSON.fromJson(request.getRequest(), ConnectionCreationRequest.class);
ServicePluginConfigurer pluginConfigurer = systemAppContext.createServicePluginConfigurer(namespace);
ConnectorConfigurer connectorConfigurer = new DefaultConnectorConfigurer(pluginConfigurer);
SimpleFailureCollector failureCollector = new SimpleFailureCollector();
ConnectorContext connectorContext = new DefaultConnectorContext(failureCollector, pluginConfigurer);
TrackedPluginSelector pluginSelector = new TrackedPluginSelector(new ArtifactSelectorProvider().getPluginSelector(connectionCreationRequest.getPlugin().getArtifact()));
try (Connector connector = getConnector(systemAppContext, pluginConfigurer, connectionCreationRequest.getPlugin(), namespace, pluginSelector)) {
connector.configure(connectorConfigurer);
try {
connector.test(connectorContext);
failureCollector.getOrThrowException();
return "";
} catch (ValidationException e) {
return GSON.toJson(e.getFailures());
}
}
}
use of io.cdap.cdap.etl.api.validation.ValidationException in project cdap by caskdata.
the class ValidationUtils method validate.
/**
* Validate plugin based on the {@link StageValidationRequest}
*
* @param validationRequest {@link StageValidationRequest} with plugin properties
* @param pluginConfigurer {@link PluginConfigurer} for using the plugin
* @param macroFn {@link Function} for evaluating macros
* @return {@link StageValidationResponse} in json format
*/
public static StageValidationResponse validate(String namespace, StageValidationRequest validationRequest, PluginConfigurer pluginConfigurer, Function<Map<String, String>, Map<String, String>> macroFn, FeatureFlagsProvider featureFlagsProvider) {
ETLStage stageConfig = validationRequest.getStage();
ValidatingConfigurer validatingConfigurer = new ValidatingConfigurer(pluginConfigurer, featureFlagsProvider);
// Batch or Streaming doesn't matter for a single stage.
PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> pipelineSpecGenerator = new BatchPipelineSpecGenerator(namespace, validatingConfigurer, null, Collections.emptySet(), Collections.emptySet(), Engine.SPARK, featureFlagsProvider);
DefaultStageConfigurer stageConfigurer = new DefaultStageConfigurer(stageConfig.getName());
for (StageSchema stageSchema : validationRequest.getInputSchemas()) {
stageConfigurer.addInputSchema(stageSchema.getStage(), stageSchema.getSchema());
stageConfigurer.addInputStage(stageSchema.getStage());
}
DefaultPipelineConfigurer pipelineConfigurer = new DefaultPipelineConfigurer(validatingConfigurer, stageConfig.getName(), Engine.SPARK, stageConfigurer, featureFlagsProvider);
// evaluate macros
Map<String, String> evaluatedProperties = macroFn.apply(stageConfig.getPlugin().getProperties());
ETLPlugin originalConfig = stageConfig.getPlugin();
ETLPlugin evaluatedConfig = new ETLPlugin(originalConfig.getName(), originalConfig.getType(), evaluatedProperties, originalConfig.getArtifactConfig());
try {
StageSpec spec = pipelineSpecGenerator.configureStage(stageConfig.getName(), evaluatedConfig, pipelineConfigurer).build();
return new StageValidationResponse(spec);
} catch (ValidationException e) {
return new StageValidationResponse(e.getFailures());
}
}
use of io.cdap.cdap.etl.api.validation.ValidationException in project cdap by caskdata.
the class PipelineSpecGenerator method configureStage.
/**
* Configures a plugin and returns the spec for it.
*
* @param stageName the unique plugin id
* @param etlPlugin user provided configuration for the plugin
* @param pipelineConfigurer default pipeline configurer to configure the plugin
* @return the spec for the plugin
* @throws IllegalArgumentException if the plugin with same id is already deployed
* @throws ValidationException if the plugin threw an exception during configuration
*/
public StageSpec.Builder configureStage(String stageName, ETLPlugin etlPlugin, DefaultPipelineConfigurer pipelineConfigurer) throws ValidationException {
TrackedPluginSelector pluginSelector = new TrackedPluginSelector(new ArtifactSelectorProvider().getPluginSelector(etlPlugin.getArtifactConfig()));
String type = etlPlugin.getType();
String pluginName = etlPlugin.getName();
DefaultStageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer();
FailureCollector collector = stageConfigurer.getFailureCollector();
Object plugin = getPlugin(stageName, etlPlugin, pluginSelector, type, pluginName, collector);
try {
if (type.equals(BatchJoiner.PLUGIN_TYPE)) {
MultiInputPipelineConfigurable multiPlugin = (MultiInputPipelineConfigurable) plugin;
multiPlugin.configurePipeline(pipelineConfigurer);
// to the BatchAutoJoiner while preserving backwards compatibility in the pipeline config.
if (plugin instanceof AutoJoiner) {
configureAutoJoiner(stageName, (AutoJoiner) plugin, stageConfigurer, collector);
}
} else if (type.equals(SplitterTransform.PLUGIN_TYPE)) {
MultiOutputPipelineConfigurable multiOutputPlugin = (MultiOutputPipelineConfigurable) plugin;
multiOutputPlugin.configurePipeline(pipelineConfigurer);
} else if (!type.equals(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
PipelineConfigurable singlePlugin = (PipelineConfigurable) plugin;
singlePlugin.configurePipeline(pipelineConfigurer);
// evaluate macros and find out if there is connection used
if ((sourcePluginTypes.contains(type) || BatchSink.PLUGIN_TYPE.equals(type)) && runtimeEvaluator == null) {
pluginConfigurer.evaluateMacros(etlPlugin.getProperties(), connectionEvaluator, options);
}
}
} catch (InvalidConfigPropertyException e) {
collector.addFailure(e.getMessage(), String.format("Provide valid value for config property '%s'.", e.getProperty())).withConfigProperty(e.getProperty());
} catch (InvalidStageException e) {
if (e.getReasons().isEmpty()) {
collector.addFailure(e.getMessage(), null);
}
for (InvalidStageException reason : e.getReasons()) {
if (reason instanceof InvalidConfigPropertyException) {
InvalidConfigPropertyException configException = (InvalidConfigPropertyException) reason;
collector.addFailure(configException.getMessage(), String.format("Provide valid value for config property '%s'.", configException.getProperty())).withConfigProperty(configException.getProperty());
} else {
collector.addFailure(reason.getMessage(), null);
}
}
} catch (ValidationException e) {
throw e;
} catch (NullPointerException e) {
// handle the case where plugin throws null pointer exception, this is to avoid having 'null' as error message
collector.addFailure(String.format("Null error occurred while configuring the stage %s.", stageName), null).withStacktrace(e.getStackTrace());
} catch (ArrayIndexOutOfBoundsException e) {
// handle the case where plugin throws index out of bounds exception,
// this is to avoid having a number like '2', '8' etc as error message
collector.addFailure(String.format("Index out of bounds error occurred while configuring the stage %s.", stageName), null).withStacktrace(e.getStackTrace());
} catch (ConnectionBadRequestException e) {
collector.addFailure(e.getMessage(), "Provide a valid connection name.");
} catch (Exception e) {
collector.addFailure(String.format("Error encountered while configuring the stage: '%s'", e.getMessage()), null).withStacktrace(e.getStackTrace());
}
// throw validation exception if there are any errors being carried by failure collector
collector.getOrThrowException();
PluginSpec pluginSpec = new PluginSpec(type, pluginName, etlPlugin.getProperties(), pluginSelector.getSelectedArtifact());
StageSpec.Builder specBuilder = StageSpec.builder(stageName, pluginSpec).addInputSchemas(pipelineConfigurer.getStageConfigurer().getInputSchemas()).setErrorSchema(stageConfigurer.getErrorSchema());
if (type.equals(SplitterTransform.PLUGIN_TYPE)) {
specBuilder.setPortSchemas(stageConfigurer.getOutputPortSchemas());
} else {
specBuilder.setOutputSchema(stageConfigurer.getOutputSchema());
}
return specBuilder;
}
use of io.cdap.cdap.etl.api.validation.ValidationException in project cdap by caskdata.
the class LoggingFailureCollector method getOrThrowException.
@Override
public ValidationException getOrThrowException() throws ValidationException {
ValidationException validationException;
try {
validationException = super.getOrThrowException();
} catch (ValidationException e) {
validationException = e;
}
if (validationException.getFailures().isEmpty()) {
return validationException;
}
List<ValidationFailure> failures = validationException.getFailures();
LOG.error("Encountered '{}' validation failures: {}{}", failures.size(), System.lineSeparator(), IntStream.range(0, failures.size()).mapToObj(index -> String.format("%d. %s", index + 1, failures.get(index).getFullMessage())).collect(Collectors.joining(System.lineSeparator())));
throw validationException;
}
use of io.cdap.cdap.etl.api.validation.ValidationException in project cdap by caskdata.
the class SmartWorkflow method configure.
@Override
protected void configure() {
setName(NAME);
setDescription("Data Pipeline Workflow");
// If plugins were registered only at the application level, CDAP would not be able to fail the run early.
try {
spec = new BatchPipelineSpecGenerator(applicationConfigurer.getDeployedNamespace(), getConfigurer(), applicationConfigurer.getRuntimeConfigurer(), ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE, SparkSink.PLUGIN_TYPE, AlertPublisher.PLUGIN_TYPE), config.getEngine(), getConfigurer()).generateSpec(config);
} catch (ValidationException e) {
throw new IllegalArgumentException(String.format("Failed to configure pipeline: %s", e.getFailures().isEmpty() ? e.getMessage() : e.getFailures().iterator().next().getFullMessage()), e);
}
// append "_" to the connection name so it will not conflict with the system tag we add
Set<String> connectionsUsed = spec.getConnectionsUsed().stream().map(s -> "_" + s).collect(Collectors.toSet());
applicationConfigurer.emitMetadata(new Metadata(Collections.emptyMap(), connectionsUsed), MetadataScope.SYSTEM);
stageSpecs = new HashMap<>();
useSpark = config.getEngine() == Engine.SPARK;
for (StageSpec stageSpec : spec.getStages()) {
stageSpecs.put(stageSpec.getName(), stageSpec);
String pluginType = stageSpec.getPlugin().getType();
if (SparkCompute.PLUGIN_TYPE.equals(pluginType) || SparkSink.PLUGIN_TYPE.equals(pluginType)) {
useSpark = true;
}
}
plan = createPlan();
WorkflowProgramAdder programAdder = new TrunkProgramAdder(getConfigurer());
// single phase, just add the program directly
if (plan.getPhases().size() == 1) {
addProgram(plan.getPhases().keySet().iterator().next(), programAdder);
setWorkflowProperties();
return;
}
// Dag classes don't allow a 'dag' without connections
if (plan.getPhaseConnections().isEmpty()) {
WorkflowProgramAdder fork = programAdder.fork();
for (String phaseName : plan.getPhases().keySet()) {
addProgram(phaseName, fork);
}
fork.join();
setWorkflowProperties();
return;
}
/*
ControlDag is used to flatten the dag that represents connections between phases.
Connections between phases represent a happens-before relationship, not the flow of data.
As such, phases can be shifted around as long as every happens-before relationship is maintained.
The exception is condition phases. Connection from a condition to another phase must be maintained as is.
Flattening a ControlDag will transform a dag into a special fork-join dag by moving phases around.
We therefore cannot blindly flatten the phase connections.
However, we validated earlier that condition outputs have a special property, where every stage following a
condition can only have a single input. This means we will never need to flatten anything after the first
set of conditions. We will only have to flatten what comes before the first set of conditions.
*/
dag = new ControlDag(plan.getPhaseConnections());
boolean dummyNodeAdded = false;
Map<String, ConditionBranches> conditionBranches = plan.getConditionPhaseBranches();
if (conditionBranches.isEmpty()) {
// after flattening, there is guaranteed to be just one source
dag.flatten();
} else if (!conditionBranches.keySet().containsAll(dag.getSources())) {
// Continue only if the condition node is not the source of the dag, otherwise dag is already in the
// required form
Set<String> conditions = conditionBranches.keySet();
// flatten only the part of the dag starting from sources and ending in conditions/sinks.
Set<String> dagNodes = dag.accessibleFrom(dag.getSources(), Sets.union(dag.getSinks(), conditions));
Set<String> dagNodesWithoutCondition = Sets.difference(dagNodes, conditions);
Set<Connection> connections = new HashSet<>();
Deque<String> bfs = new LinkedList<>();
Set<String> sinks = new HashSet<>();
// If its a single phase without condition then no need to flatten
if (dagNodesWithoutCondition.size() < 2) {
sinks.addAll(dagNodesWithoutCondition);
} else {
/*
Create a subdag from dagNodesWithoutCondition.
There are a couple situations where this is not immediately possible. For example:
source1 --|
|--> condition -- ...
source2 --|
Here, dagNodesWithoutCondition = [source1, source2], which is an invalid dag. Similarly:
source --> condition -- ...
Here, dagNodesWithoutCondition = [source], which is also invalid. In order to ensure that we have a
valid dag, we just insert a dummy node as the first node in the subdag, adding a connection from the
dummy node to all the sources.
*/
Dag subDag;
try {
subDag = dag.createSubDag(dagNodesWithoutCondition);
} catch (IllegalArgumentException | DisjointConnectionsException e) {
// DisjointConnectionsException thrown when islands are created from the dagNodesWithoutCondition
// IllegalArgumentException thrown when connections are empty
// In both cases we need to add dummy node and create connected Dag
String dummyNode = "dummy";
dummyNodeAdded = true;
Set<Connection> subDagConnections = new HashSet<>();
for (String source : dag.getSources()) {
subDagConnections.add(new Connection(dummyNode, source));
}
Deque<String> subDagBFS = new LinkedList<>();
subDagBFS.addAll(dag.getSources());
while (subDagBFS.peek() != null) {
String node = subDagBFS.poll();
for (String output : dag.getNodeOutputs(node)) {
if (dagNodesWithoutCondition.contains(output)) {
subDagConnections.add(new Connection(node, output));
subDagBFS.add(output);
}
}
}
subDag = new Dag(subDagConnections);
}
ControlDag cdag = new ControlDag(subDag);
cdag.flatten();
// Add all connections from cdag
bfs.addAll(cdag.getSources());
while (bfs.peek() != null) {
String node = bfs.poll();
for (String output : cdag.getNodeOutputs(node)) {
connections.add(new Connection(node, output));
bfs.add(output);
}
}
sinks.addAll(cdag.getSinks());
}
// Add back the existing condition nodes and corresponding conditions
Set<String> conditionsFromDag = Sets.intersection(dagNodes, conditions);
for (String condition : conditionsFromDag) {
connections.add(new Connection(sinks.iterator().next(), condition));
}
bfs.addAll(Sets.intersection(dagNodes, conditions));
while (bfs.peek() != null) {
String node = bfs.poll();
ConditionBranches branches = conditionBranches.get(node);
if (branches == null) {
// not a condition node. add outputs
for (String output : dag.getNodeOutputs(node)) {
connections.add(new Connection(node, output));
bfs.add(output);
}
} else {
// condition node
for (Boolean condition : Arrays.asList(true, false)) {
String phase = condition ? branches.getTrueOutput() : branches.getFalseOutput();
if (phase == null) {
continue;
}
connections.add(new Connection(node, phase, condition));
bfs.add(phase);
}
}
}
dag = new ControlDag(connections);
}
if (dummyNodeAdded) {
WorkflowProgramAdder fork = programAdder.fork();
String dummyNode = dag.getSources().iterator().next();
// need to make sure we don't call also() if this is the final branch
Iterator<String> outputIter = dag.getNodeOutputs(dummyNode).iterator();
addBranchPrograms(outputIter.next(), fork, false);
while (outputIter.hasNext()) {
fork = fork.also();
addBranchPrograms(outputIter.next(), fork, !outputIter.hasNext());
}
} else {
String start = dag.getSources().iterator().next();
addPrograms(start, programAdder);
}
setWorkflowProperties();
}
Aggregations