Search in sources :

Example 1 with Dag

use of io.cdap.cdap.etl.planner.Dag in project cdap by caskdata.

the class TransformExecutorFactory method create.

/**
 * Create a transform executor for the specified pipeline. Will instantiate and initialize all sources,
 * transforms, and sinks in the pipeline.
 *
 * @param pipeline the pipeline to create a transform executor for
 * @return executor for the pipeline
 * @throws InstantiationException if there was an error instantiating a plugin
 * @throws Exception              if there was an error initializing a plugin
 */
public PipeTransformExecutor<T> create(PipelinePhase pipeline) throws Exception {
    // populate the pipe stages in reverse topological order to ensure that an output is always created before its
    // input. this will allow us to setup all outputs for a stage when we get to it.
    Dag pipelineDag = pipeline.getDag();
    // dag is null if the pipeline phase contains a single stage.
    List<String> traversalOrder = pipelineDag == null ? Collections.singletonList(pipeline.iterator().next().getName()) : pipelineDag.getTopologicalOrder();
    Collections.reverse(traversalOrder);
    Map<String, PipeStage> pipeStages = new HashMap<>();
    for (String stageName : traversalOrder) {
        pipeStages.put(stageName, getPipeStage(pipeline, stageName, pipeStages));
    }
    // sourceStageName will be null in reducers, so need to handle that case
    Set<String> startingPoints = (sourceStageName == null) ? pipeline.getSources() : Sets.newHashSet(sourceStageName);
    return new PipeTransformExecutor<>(pipeStages, startingPoints);
}
Also used : HashMap(java.util.HashMap) Dag(io.cdap.cdap.etl.planner.Dag)

Example 2 with Dag

use of io.cdap.cdap.etl.planner.Dag in project cdap by caskdata.

the class SmartWorkflow method configure.

@Override
protected void configure() {
    setName(NAME);
    setDescription("Data Pipeline Workflow");
    // If plugins were registered only at the application level, CDAP would not be able to fail the run early.
    try {
        spec = new BatchPipelineSpecGenerator(applicationConfigurer.getDeployedNamespace(), getConfigurer(), applicationConfigurer.getRuntimeConfigurer(), ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE, SparkSink.PLUGIN_TYPE, AlertPublisher.PLUGIN_TYPE), config.getEngine(), getConfigurer()).generateSpec(config);
    } catch (ValidationException e) {
        throw new IllegalArgumentException(String.format("Failed to configure pipeline: %s", e.getFailures().isEmpty() ? e.getMessage() : e.getFailures().iterator().next().getFullMessage()), e);
    }
    // append "_" to the connection name so it will not conflict with the system tag we add
    Set<String> connectionsUsed = spec.getConnectionsUsed().stream().map(s -> "_" + s).collect(Collectors.toSet());
    applicationConfigurer.emitMetadata(new Metadata(Collections.emptyMap(), connectionsUsed), MetadataScope.SYSTEM);
    stageSpecs = new HashMap<>();
    useSpark = config.getEngine() == Engine.SPARK;
    for (StageSpec stageSpec : spec.getStages()) {
        stageSpecs.put(stageSpec.getName(), stageSpec);
        String pluginType = stageSpec.getPlugin().getType();
        if (SparkCompute.PLUGIN_TYPE.equals(pluginType) || SparkSink.PLUGIN_TYPE.equals(pluginType)) {
            useSpark = true;
        }
    }
    plan = createPlan();
    WorkflowProgramAdder programAdder = new TrunkProgramAdder(getConfigurer());
    // single phase, just add the program directly
    if (plan.getPhases().size() == 1) {
        addProgram(plan.getPhases().keySet().iterator().next(), programAdder);
        setWorkflowProperties();
        return;
    }
    // Dag classes don't allow a 'dag' without connections
    if (plan.getPhaseConnections().isEmpty()) {
        WorkflowProgramAdder fork = programAdder.fork();
        for (String phaseName : plan.getPhases().keySet()) {
            addProgram(phaseName, fork);
        }
        fork.join();
        setWorkflowProperties();
        return;
    }
    /*
       ControlDag is used to flatten the dag that represents connections between phases.
       Connections between phases represent a happens-before relationship, not the flow of data.
       As such, phases can be shifted around as long as every happens-before relationship is maintained.
       The exception is condition phases. Connection from a condition to another phase must be maintained as is.

       Flattening a ControlDag will transform a dag into a special fork-join dag by moving phases around.
       We therefore cannot blindly flatten the phase connections.
       However, we validated earlier that condition outputs have a special property, where every stage following a
       condition can only have a single input. This means we will never need to flatten anything after the first
       set of conditions. We will only have to flatten what comes before the first set of conditions.
     */
    dag = new ControlDag(plan.getPhaseConnections());
    boolean dummyNodeAdded = false;
    Map<String, ConditionBranches> conditionBranches = plan.getConditionPhaseBranches();
    if (conditionBranches.isEmpty()) {
        // after flattening, there is guaranteed to be just one source
        dag.flatten();
    } else if (!conditionBranches.keySet().containsAll(dag.getSources())) {
        // Continue only if the condition node is not the source of the dag, otherwise dag is already in the
        // required form
        Set<String> conditions = conditionBranches.keySet();
        // flatten only the part of the dag starting from sources and ending in conditions/sinks.
        Set<String> dagNodes = dag.accessibleFrom(dag.getSources(), Sets.union(dag.getSinks(), conditions));
        Set<String> dagNodesWithoutCondition = Sets.difference(dagNodes, conditions);
        Set<Connection> connections = new HashSet<>();
        Deque<String> bfs = new LinkedList<>();
        Set<String> sinks = new HashSet<>();
        // If its a single phase without condition then no need to flatten
        if (dagNodesWithoutCondition.size() < 2) {
            sinks.addAll(dagNodesWithoutCondition);
        } else {
            /*
           Create a subdag from dagNodesWithoutCondition.
           There are a couple situations where this is not immediately possible. For example:

             source1 --|
                       |--> condition -- ...
             source2 --|

           Here, dagNodesWithoutCondition = [source1, source2], which is an invalid dag. Similarly:

             source --> condition -- ...

           Here, dagNodesWithoutCondition = [source], which is also invalid. In order to ensure that we have a
           valid dag, we just insert a dummy node as the first node in the subdag, adding a connection from the
           dummy node to all the sources.
         */
            Dag subDag;
            try {
                subDag = dag.createSubDag(dagNodesWithoutCondition);
            } catch (IllegalArgumentException | DisjointConnectionsException e) {
                // DisjointConnectionsException thrown when islands are created from the dagNodesWithoutCondition
                // IllegalArgumentException thrown when connections are empty
                // In both cases we need to add dummy node and create connected Dag
                String dummyNode = "dummy";
                dummyNodeAdded = true;
                Set<Connection> subDagConnections = new HashSet<>();
                for (String source : dag.getSources()) {
                    subDagConnections.add(new Connection(dummyNode, source));
                }
                Deque<String> subDagBFS = new LinkedList<>();
                subDagBFS.addAll(dag.getSources());
                while (subDagBFS.peek() != null) {
                    String node = subDagBFS.poll();
                    for (String output : dag.getNodeOutputs(node)) {
                        if (dagNodesWithoutCondition.contains(output)) {
                            subDagConnections.add(new Connection(node, output));
                            subDagBFS.add(output);
                        }
                    }
                }
                subDag = new Dag(subDagConnections);
            }
            ControlDag cdag = new ControlDag(subDag);
            cdag.flatten();
            // Add all connections from cdag
            bfs.addAll(cdag.getSources());
            while (bfs.peek() != null) {
                String node = bfs.poll();
                for (String output : cdag.getNodeOutputs(node)) {
                    connections.add(new Connection(node, output));
                    bfs.add(output);
                }
            }
            sinks.addAll(cdag.getSinks());
        }
        // Add back the existing condition nodes and corresponding conditions
        Set<String> conditionsFromDag = Sets.intersection(dagNodes, conditions);
        for (String condition : conditionsFromDag) {
            connections.add(new Connection(sinks.iterator().next(), condition));
        }
        bfs.addAll(Sets.intersection(dagNodes, conditions));
        while (bfs.peek() != null) {
            String node = bfs.poll();
            ConditionBranches branches = conditionBranches.get(node);
            if (branches == null) {
                // not a condition node. add outputs
                for (String output : dag.getNodeOutputs(node)) {
                    connections.add(new Connection(node, output));
                    bfs.add(output);
                }
            } else {
                // condition node
                for (Boolean condition : Arrays.asList(true, false)) {
                    String phase = condition ? branches.getTrueOutput() : branches.getFalseOutput();
                    if (phase == null) {
                        continue;
                    }
                    connections.add(new Connection(node, phase, condition));
                    bfs.add(phase);
                }
            }
        }
        dag = new ControlDag(connections);
    }
    if (dummyNodeAdded) {
        WorkflowProgramAdder fork = programAdder.fork();
        String dummyNode = dag.getSources().iterator().next();
        // need to make sure we don't call also() if this is the final branch
        Iterator<String> outputIter = dag.getNodeOutputs(dummyNode).iterator();
        addBranchPrograms(outputIter.next(), fork, false);
        while (outputIter.hasNext()) {
            fork = fork.also();
            addBranchPrograms(outputIter.next(), fork, !outputIter.hasNext());
        }
    } else {
        String start = dag.getSources().iterator().next();
        addPrograms(start, programAdder);
    }
    setWorkflowProperties();
}
Also used : AlertPublisher(io.cdap.cdap.etl.api.AlertPublisher) BatchSource(io.cdap.cdap.etl.api.batch.BatchSource) Engine(io.cdap.cdap.etl.api.Engine) ConnectorSource(io.cdap.cdap.etl.batch.connector.ConnectorSource) Arrays(java.util.Arrays) TypeToken(com.google.gson.reflect.TypeToken) MultiConnectorSource(io.cdap.cdap.etl.batch.connector.MultiConnectorSource) LoggerFactory(org.slf4j.LoggerFactory) AbstractWorkflow(io.cdap.cdap.api.workflow.AbstractWorkflow) SparkSink(io.cdap.cdap.etl.api.batch.SparkSink) GsonBuilder(com.google.gson.GsonBuilder) PipelineAction(io.cdap.cdap.etl.batch.customaction.PipelineAction) Constants(io.cdap.cdap.etl.common.Constants) FieldOperationTypeAdapter(io.cdap.cdap.etl.common.FieldOperationTypeAdapter) WorkflowContext(io.cdap.cdap.api.workflow.WorkflowContext) Gson(com.google.gson.Gson) StageMetrics(io.cdap.cdap.etl.api.StageMetrics) Map(java.util.Map) Connection(io.cdap.cdap.etl.proto.Connection) ProgramStatus(io.cdap.cdap.api.ProgramStatus) Condition(io.cdap.cdap.etl.api.condition.Condition) TriggeringPropertyMapping(io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping) Action(io.cdap.cdap.etl.api.action.Action) BatchJoiner(io.cdap.cdap.etl.api.batch.BatchJoiner) ActionSpec(io.cdap.cdap.etl.batch.ActionSpec) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) Metrics(io.cdap.cdap.api.metrics.Metrics) AlertPublisherContext(io.cdap.cdap.etl.api.AlertPublisherContext) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) List(java.util.List) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics) BasicArguments(io.cdap.cdap.etl.common.BasicArguments) Type(java.lang.reflect.Type) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) ETLSpark(io.cdap.cdap.etl.spark.batch.ETLSpark) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) ApplicationConfigurer(io.cdap.cdap.api.app.ApplicationConfigurer) TrackedIterator(io.cdap.cdap.etl.common.TrackedIterator) ControlDag(io.cdap.cdap.etl.planner.ControlDag) WorkflowBackedActionContext(io.cdap.cdap.etl.batch.WorkflowBackedActionContext) TriggerInfo(io.cdap.cdap.api.schedule.TriggerInfo) Alert(io.cdap.cdap.etl.api.Alert) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) SplitterTransform(io.cdap.cdap.etl.api.SplitterTransform) SchemaTypeAdapter(io.cdap.cdap.internal.io.SchemaTypeAdapter) BatchPipelineSpecGenerator(io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext) HashMap(java.util.HashMap) AlertPublisherSink(io.cdap.cdap.etl.batch.connector.AlertPublisherSink) Deque(java.util.Deque) AlertReader(io.cdap.cdap.etl.batch.connector.AlertReader) ArrayList(java.util.ArrayList) DisjointConnectionsException(io.cdap.cdap.etl.planner.DisjointConnectionsException) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) LocationAwareMDCWrapperLogger(io.cdap.cdap.etl.common.LocationAwareMDCWrapperLogger) PluginContext(io.cdap.cdap.api.plugin.PluginContext) PipelinePlan(io.cdap.cdap.etl.planner.PipelinePlan) BatchActionContext(io.cdap.cdap.etl.api.batch.BatchActionContext) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) PipelinePlanner(io.cdap.cdap.etl.planner.PipelinePlanner) FieldLineageProcessor(io.cdap.cdap.etl.lineage.FieldLineageProcessor) TriggeringScheduleInfo(io.cdap.cdap.api.schedule.TriggeringScheduleInfo) PipelineCondition(io.cdap.cdap.etl.batch.condition.PipelineCondition) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) LinkedList(java.util.LinkedList) Operation(io.cdap.cdap.api.lineage.field.Operation) ETLMapReduce(io.cdap.cdap.etl.batch.mapreduce.ETLMapReduce) Metadata(io.cdap.cdap.api.metadata.Metadata) Logger(org.slf4j.Logger) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) ConditionBranches(io.cdap.cdap.etl.planner.ConditionBranches) Iterator(java.util.Iterator) BatchAggregator(io.cdap.cdap.etl.api.batch.BatchAggregator) SparkCompute(io.cdap.cdap.etl.api.batch.SparkCompute) Dag(io.cdap.cdap.etl.planner.Dag) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ProgramStatusTriggerInfo(io.cdap.cdap.api.schedule.ProgramStatusTriggerInfo) Schema(io.cdap.cdap.api.data.schema.Schema) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) PostAction(io.cdap.cdap.etl.api.batch.PostAction) NodeValue(io.cdap.cdap.api.workflow.NodeValue) BatchSink(io.cdap.cdap.etl.api.batch.BatchSink) DefaultAlertPublisherContext(io.cdap.cdap.etl.common.DefaultAlertPublisherContext) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) Collections(java.util.Collections) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) ControlDag(io.cdap.cdap.etl.planner.ControlDag) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) HashSet(java.util.HashSet) Metadata(io.cdap.cdap.api.metadata.Metadata) ConditionBranches(io.cdap.cdap.etl.planner.ConditionBranches) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) BatchPipelineSpecGenerator(io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator) Connection(io.cdap.cdap.etl.proto.Connection) ControlDag(io.cdap.cdap.etl.planner.ControlDag) Dag(io.cdap.cdap.etl.planner.Dag) Deque(java.util.Deque) LinkedList(java.util.LinkedList)

Example 3 with Dag

use of io.cdap.cdap.etl.planner.Dag in project cdap by caskdata.

the class PipelineSpecGenerator method validateConfig.

/**
 * Validate that this is a valid pipeline. A valid pipeline has the following properties:
 *
 * All stages in the pipeline have a unique name.
 * Source stages have at least one output and no inputs.
 * Sink stages have at least one input and no outputs.
 * There are no cycles in the pipeline.
 * All inputs into a stage have the same schema.
 * ErrorTransforms only have BatchSource, Transform, or BatchAggregator as input stages.
 * AlertPublishers have at least one input and no outputs and don't have SparkSink or BatchSink as input.
 * Action stages can only be at the start or end of the pipeline.
 * Condition stages have at most 2 outputs. Each stage on a condition's output branch has at most a single input.
 *
 * Returns the stages in the order they should be configured to ensure that all input stages are configured
 * before their output.
 *
 * @param config the user provided configuration
 * @return the order to configure the stages in
 * @throws IllegalArgumentException if the pipeline is invalid
 */
protected ValidatedPipeline validateConfig(ETLConfig config) {
    config.validate();
    if (config.getStages().isEmpty()) {
        throw new IllegalArgumentException("A pipeline must contain at least one stage.");
    }
    Set<String> actionStages = new HashSet<>();
    Set<String> conditionStages = new HashSet<>();
    Map<String, String> stageTypes = new HashMap<>();
    // check stage name uniqueness
    Set<String> stageNames = new HashSet<>();
    for (ETLStage stage : config.getStages()) {
        if (!stageNames.add(stage.getName())) {
            throw new IllegalArgumentException(String.format("Invalid pipeline. Multiple stages are named %s. Please ensure all stage names are unique", stage.getName()));
        }
        // if stage is Action stage, add it to the Action stage set
        if (isAction(stage.getPlugin().getType())) {
            actionStages.add(stage.getName());
        }
        // if the stage is condition add it to the Condition stage set
        if (stage.getPlugin().getType().equals(Condition.PLUGIN_TYPE)) {
            conditionStages.add(stage.getName());
        }
        stageTypes.put(stage.getName(), stage.getPlugin().getType());
    }
    // check that the from and to are names of actual stages
    // also check that conditions have at most 2 outgoing connections each label with true or
    // false but not both
    Map<String, Boolean> conditionBranch = new HashMap<>();
    for (Connection connection : config.getConnections()) {
        if (!stageNames.contains(connection.getFrom())) {
            throw new IllegalArgumentException(String.format("Invalid connection %s. %s is not a stage.", connection, connection.getFrom()));
        }
        if (!stageNames.contains(connection.getTo())) {
            throw new IllegalArgumentException(String.format("Invalid connection %s. %s is not a stage.", connection, connection.getTo()));
        }
        if (conditionStages.contains(connection.getFrom())) {
            if (connection.getCondition() == null) {
                String msg = String.format("For condition stage %s, the connection %s is not marked with either " + "'true' or 'false'.", connection.getFrom(), connection);
                throw new IllegalArgumentException(msg);
            }
            // check if connection from the condition node is marked as true or false multiple times
            if (conditionBranch.containsKey(connection.getFrom()) && connection.getCondition().equals(conditionBranch.get(connection.getFrom()))) {
                String msg = String.format("For condition stage '%s', more than one outgoing connections are marked as %s.", connection.getFrom(), connection.getCondition());
                throw new IllegalArgumentException(msg);
            }
            conditionBranch.put(connection.getFrom(), connection.getCondition());
        }
    }
    List<ETLStage> traversalOrder = new ArrayList<>(stageNames.size());
    // can only have empty connections if the pipeline consists of a single action.
    if (config.getConnections().isEmpty()) {
        if (actionStages.size() == 1 && stageNames.size() == 1) {
            traversalOrder.add(config.getStages().iterator().next());
            return new ValidatedPipeline(traversalOrder, config);
        } else {
            throw new IllegalArgumentException("Invalid pipeline. There are no connections between stages. " + "This is only allowed if the pipeline consists of a single action plugin.");
        }
    }
    Dag dag = new Dag(config.getConnections());
    Set<String> controlStages = Sets.union(actionStages, conditionStages);
    Map<String, ETLStage> stages = new HashMap<>();
    for (ETLStage stage : config.getStages()) {
        String stageName = stage.getName();
        Set<String> stageInputs = dag.getNodeInputs(stageName);
        Set<String> stageOutputs = dag.getNodeOutputs(stageName);
        String stageType = stage.getPlugin().getType();
        boolean isSource = isSource(stageType);
        boolean isSink = isSink(stageType);
        // check source plugins are sources in the dag
        if (isSource) {
            if (!stageInputs.isEmpty() && !controlStages.containsAll(stageInputs)) {
                throw new IllegalArgumentException(String.format("%s %s has incoming connections from %s. %s stages cannot have any incoming connections.", stageType, stageName, Joiner.on(',').join(stageInputs), stageType));
            }
            // check that source plugins are not present after any non-condition/action stage
            Set<String> parents = dag.parentsOf(stageName);
            Set<String> nonControlParents = Sets.difference(parents, controlStages);
            if (nonControlParents.size() > 1) {
                // the stage's nonControlParents should only contain itself
                throw new IllegalArgumentException(String.format("%s %s is invalid. %s stages can only be placed at the start of the pipeline.", stageType, stageName, stageType));
            }
        } else if (isSink) {
            if (!stageOutputs.isEmpty() && !controlStages.containsAll(stageOutputs)) {
                throw new IllegalArgumentException(String.format("%s %s has outgoing connections to %s. %s stages cannot have any outgoing connections.", stageType, stageName, Joiner.on(',').join(stageOutputs), stageType));
            }
        } else if (ErrorTransform.PLUGIN_TYPE.equals(stageType)) {
            for (String inputStage : stageInputs) {
                String inputType = stageTypes.get(inputStage);
                if (!VALID_ERROR_INPUTS.contains(inputType)) {
                    throw new IllegalArgumentException(String.format("ErrorTransform %s cannot have stage %s of type %s as input. Only %s stages can emit errors.", stageName, inputStage, inputType, Joiner.on(',').join(VALID_ERROR_INPUTS)));
                }
            }
        }
        boolean isAction = isAction(stageType);
        if (!isAction && !stageType.equals(Condition.PLUGIN_TYPE) && !isSource && stageInputs.isEmpty()) {
            throw new IllegalArgumentException(String.format("Stage %s is unreachable, it has no incoming connections.", stageName));
        }
        if (!isAction && !isSink && stageOutputs.isEmpty()) {
            throw new IllegalArgumentException(String.format("Stage %s is a dead end, it has no outgoing connections.", stageName));
        }
        stages.put(stageName, stage);
    }
    // make sure actions are not in the middle of the pipeline -- only at the start and/or end
    for (String actionStage : actionStages) {
        Set<String> actionParents = dag.parentsOf(actionStage);
        Set<String> actionChildren = dag.accessibleFrom(actionStage);
        Set<String> nonControlParents = Sets.difference(actionParents, controlStages);
        Set<String> nonControlChildren = Sets.difference(actionChildren, controlStages);
        if (!nonControlChildren.isEmpty() && !nonControlParents.isEmpty()) {
            throw new IllegalArgumentException(String.format("Action stage '%s' is invalid. Actions can only be placed at the start or end of the pipeline.", actionStage));
        }
    }
    validateConditionBranches(conditionStages, dag);
    for (String stageName : dag.getTopologicalOrder()) {
        traversalOrder.add(stages.get(stageName));
    }
    return new ValidatedPipeline(traversalOrder, config);
}
Also used : HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) ArrayList(java.util.ArrayList) Dag(io.cdap.cdap.etl.planner.Dag) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) HashSet(java.util.HashSet)

Aggregations

Dag (io.cdap.cdap.etl.planner.Dag)3 HashMap (java.util.HashMap)3 ImmutableSet (com.google.common.collect.ImmutableSet)1 Sets (com.google.common.collect.Sets)1 Gson (com.google.gson.Gson)1 GsonBuilder (com.google.gson.GsonBuilder)1 TypeToken (com.google.gson.reflect.TypeToken)1 ProgramStatus (io.cdap.cdap.api.ProgramStatus)1 ApplicationConfigurer (io.cdap.cdap.api.app.ApplicationConfigurer)1 Schema (io.cdap.cdap.api.data.schema.Schema)1 CloseableIterator (io.cdap.cdap.api.dataset.lib.CloseableIterator)1 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)1 Operation (io.cdap.cdap.api.lineage.field.Operation)1 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)1 Metadata (io.cdap.cdap.api.metadata.Metadata)1 MetadataScope (io.cdap.cdap.api.metadata.MetadataScope)1 Metrics (io.cdap.cdap.api.metrics.Metrics)1 PluginContext (io.cdap.cdap.api.plugin.PluginContext)1 ProgramStatusTriggerInfo (io.cdap.cdap.api.schedule.ProgramStatusTriggerInfo)1 TriggerInfo (io.cdap.cdap.api.schedule.TriggerInfo)1