Search in sources :

Example 6 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class MultiSinkFunction method initializeBranchExecutors.

private void initializeBranchExecutors() {
    emitter = new DefaultEmitter<>();
    PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(pipelineRuntime.getPluginContext(), pipelineRuntime.getMetrics(), phaseSpec, new SingleConnectorFactory());
    MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), pipelineRuntime.getLogicalStartTime(), pipelineRuntime.getSecureStore(), pipelineRuntime.getServiceDiscoverer(), pipelineRuntime.getNamespace());
    executorFactory = new SparkTransformExecutorFactory(pluginInstantiator, macroEvaluator, null, collectors, dataTracers, pipelineRuntime, emitter);
    /*
       If the dag is:

            |--> t1 --> k1
       s1 --|
            |--> k2
                 ^
           s2 ---|

       the group is t1, k1, and k2.
     */
    PipelinePhase pipelinePhase = phaseSpec.getPhase();
    branchExecutors = new HashMap<>();
    inputConnections = new HashMap<>();
    for (String groupSource : group) {
        // group "sources" are stages in the group that don't have an input from another stage in the group.
        if (Sets.difference(pipelinePhase.getStageInputs(groupSource), group).isEmpty()) {
            continue;
        }
        // get the branch by taking a subset of the pipeline starting from the "source".
        // with the example above, the two branches are t1 -> k1, and k2.
        PipelinePhase branch;
        if (pipelinePhase.getSinks().contains(groupSource)) {
            // pipelinePhase.subsetFrom() throws an exception if the new "source" is also a sink,
            // since a Dag cannot be a single node. so build it manually.
            branch = PipelinePhase.builder(pipelinePhase.getPluginTypes()).addStage(pipelinePhase.getStage(groupSource)).build();
        } else {
            branch = pipelinePhase.subsetFrom(Collections.singleton(groupSource));
        }
        try {
            branchExecutors.put(groupSource, executorFactory.create(branch));
        } catch (Exception e) {
            throw new IllegalStateException(String.format("Unable to get subset of pipeline starting from stage %s. " + "This indicates a planning error. Please report this bug and turn off stage " + "consolidation by setting %s to false in the runtime arguments.", groupSource, Constants.CONSOLIDATE_STAGES), e);
        }
        /*
          create a mapping from possible inputs to "group sources". This will help identify which incoming
          records should be sent to which branch executor.

          for example, the pipeline may look like:

                           |port a --> k1
             s --> split --|
                           |port b --> k2

          In this scenario, k1, and k2, are all in the same group, so the map contains:

            { stageName: split, port: a, type: output } -> [k1]
            { stageName: split, port: b, type: output } -> [k2]

          A slightly more complicated example:

                               |--> k1
            s1 --> transform --|
                      |        |--> k2
                      |
                      |--> error collector --> k3

          In this scenario, k1, k2, k3, and error collector are in the same group, so the map contains:

            { stageName: transform, type: output } -> [k1, k2]
            { stageName: transform, type: error } -> [k3]
       */
        String groupSourceType = pipelinePhase.getStage(groupSource).getPluginType();
        RecordType recordType = ErrorTransform.PLUGIN_TYPE.equals(groupSourceType) ? RecordType.ERROR : RecordType.OUTPUT;
        for (String inputStage : pipelinePhase.getStageInputs(groupSource)) {
            Map<String, StageSpec.Port> ports = pipelinePhase.getStage(inputStage).getOutputPorts();
            String port = ports.get(groupSource).getPort();
            InputInfo inputInfo = new InputInfo(inputStage, recordType, port);
            Set<String> groupSources = inputConnections.computeIfAbsent(inputInfo, key -> new HashSet<>());
            groupSources.add(groupSource);
        }
    }
}
Also used : DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) SingleConnectorFactory(io.cdap.cdap.etl.batch.connector.SingleConnectorFactory) SparkTransformExecutorFactory(io.cdap.cdap.etl.spark.SparkTransformExecutorFactory) RecordType(io.cdap.cdap.etl.common.RecordType) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) PipelinePluginInstantiator(io.cdap.cdap.etl.batch.PipelinePluginInstantiator)

Example 7 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class PipelineAction method run.

@Override
public void run() throws Exception {
    CustomActionContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    PipelinePhase phase = phaseSpec.getPhase();
    StageSpec stageSpec = phase.iterator().next();
    PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
    Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace()));
    ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
    if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
        action.run(actionContext);
    }
    WorkflowToken token = context.getWorkflowToken();
    if (token == null) {
        throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
    }
    for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
        token.put(entry.getKey(), entry.getValue());
    }
}
Also used : Action(io.cdap.cdap.etl.api.action.Action) AbstractCustomAction(io.cdap.cdap.api.customaction.AbstractCustomAction) CustomAction(io.cdap.cdap.api.customaction.CustomAction) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext) PluginContext(io.cdap.cdap.api.plugin.PluginContext) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) ActionContext(io.cdap.cdap.etl.api.action.ActionContext) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) HashMap(java.util.HashMap) Map(java.util.Map) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext)

Example 8 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class SparkPipelineRunner method runPipeline.

public void runPipeline(PhaseSpec phaseSpec, String sourcePluginType, JavaSparkExecutionContext sec, Map<String, Integer> stagePartitions, PluginContext pluginContext, Map<String, StageStatisticsCollector> collectors, Set<String> uncombinableSinks, boolean consolidateStages, boolean cacheFunctions) throws Exception {
    PipelinePhase pipelinePhase = phaseSpec.getPhase();
    BasicArguments arguments = new BasicArguments(sec);
    FunctionCache.Factory functionCacheFactory = FunctionCache.Factory.newInstance(cacheFunctions);
    MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(arguments, sec.getLogicalStartTime(), sec.getSecureStore(), sec.getServiceDiscoverer(), sec.getNamespace());
    Map<String, EmittedRecords> emittedRecords = new HashMap<>();
    // should never happen, but removes warning
    if (pipelinePhase.getDag() == null) {
        throw new IllegalStateException("Pipeline phase has no connections.");
    }
    Set<String> uncombinableStages = new HashSet<>(uncombinableSinks);
    for (String uncombinableType : UNCOMBINABLE_PLUGIN_TYPES) {
        pipelinePhase.getStagesOfType(uncombinableType).stream().map(StageSpec::getName).forEach(s -> uncombinableStages.add(s));
    }
    CombinerDag groupedDag = new CombinerDag(pipelinePhase.getDag(), uncombinableStages);
    Map<String, Set<String>> groups = consolidateStages ? groupedDag.groupNodes() : Collections.emptyMap();
    if (!groups.isEmpty()) {
        LOG.debug("Stage consolidation is on.");
        int groupNum = 1;
        for (Set<String> group : groups.values()) {
            LOG.debug("Group{}: {}", groupNum, group);
            groupNum++;
        }
    }
    Set<String> branchers = new HashSet<>();
    for (String stageName : groupedDag.getNodes()) {
        if (groupedDag.getNodeOutputs(stageName).size() > 1) {
            branchers.add(stageName);
        }
    }
    Set<String> shufflers = pipelinePhase.getStagesOfType(BatchAggregator.PLUGIN_TYPE).stream().map(StageSpec::getName).collect(Collectors.toSet());
    Collection<Runnable> sinkRunnables = new ArrayList<>();
    for (String stageName : groupedDag.getTopologicalOrder()) {
        if (groups.containsKey(stageName)) {
            sinkRunnables.add(handleGroup(sec, phaseSpec, groups.get(stageName), groupedDag.getNodeInputs(stageName), emittedRecords, collectors));
            continue;
        }
        StageSpec stageSpec = pipelinePhase.getStage(stageName);
        String pluginType = stageSpec.getPluginType();
        EmittedRecords.Builder emittedBuilder = EmittedRecords.builder();
        // don't want to do an additional filter for stages that can emit errors,
        // but aren't connected to an ErrorTransform
        // similarly, don't want to do an additional filter for alerts when the stage isn't connected to
        // an AlertPublisher
        boolean hasErrorOutput = false;
        boolean hasAlertOutput = false;
        Set<String> outputs = pipelinePhase.getStageOutputs(stageName);
        for (String output : outputs) {
            String outputPluginType = pipelinePhase.getStage(output).getPluginType();
            // noinspection ConstantConditions
            if (ErrorTransform.PLUGIN_TYPE.equals(outputPluginType)) {
                hasErrorOutput = true;
            } else if (AlertPublisher.PLUGIN_TYPE.equals(outputPluginType)) {
                hasAlertOutput = true;
            }
        }
        SparkCollection<Object> stageData = null;
        Map<String, SparkCollection<Object>> inputDataCollections = new HashMap<>();
        Set<String> stageInputs = pipelinePhase.getStageInputs(stageName);
        for (String inputStageName : stageInputs) {
            StageSpec inputStageSpec = pipelinePhase.getStage(inputStageName);
            if (inputStageSpec == null) {
                // means the input to this stage is in a separate phase. For example, it is an action.
                continue;
            }
            String port = null;
            // not errors or alerts or output port records
            if (!Constants.Connector.PLUGIN_TYPE.equals(inputStageSpec.getPluginType()) && !Constants.Connector.PLUGIN_TYPE.equals(pluginType)) {
                port = inputStageSpec.getOutputPorts().get(stageName).getPort();
            }
            SparkCollection<Object> inputRecords = port == null ? emittedRecords.get(inputStageName).outputRecords : emittedRecords.get(inputStageName).outputPortRecords.get(port);
            inputDataCollections.put(inputStageName, inputRecords);
        }
        // initialize the stageRDD as the union of all input RDDs.
        if (!inputDataCollections.isEmpty()) {
            Iterator<SparkCollection<Object>> inputCollectionIter = inputDataCollections.values().iterator();
            stageData = inputCollectionIter.next();
            // don't union inputs records if we're joining or if we're processing errors
            while (!BatchJoiner.PLUGIN_TYPE.equals(pluginType) && !ErrorTransform.PLUGIN_TYPE.equals(pluginType) && inputCollectionIter.hasNext()) {
                stageData = stageData.union(inputCollectionIter.next());
            }
        }
        boolean isConnectorSource = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && pipelinePhase.getSources().contains(stageName);
        boolean isConnectorSink = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && pipelinePhase.getSinks().contains(stageName);
        StageStatisticsCollector collector = collectors.get(stageName) == null ? new NoopStageStatisticsCollector() : collectors.get(stageName);
        PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
        if (stageData == null) {
            // null in the other else-if conditions
            if (sourcePluginType.equals(pluginType) || isConnectorSource) {
                SparkCollection<RecordInfo<Object>> combinedData = getSource(stageSpec, functionCacheFactory, collector);
                emittedBuilder = addEmitted(emittedBuilder, pipelinePhase, stageSpec, combinedData, groupedDag, branchers, shufflers, hasErrorOutput, hasAlertOutput);
            } else {
                throw new IllegalStateException(String.format("Stage '%s' has no input and is not a source.", stageName));
            }
        } else if (BatchSink.PLUGIN_TYPE.equals(pluginType) || isConnectorSink) {
            sinkRunnables.add(stageData.createStoreTask(stageSpec, new BatchSinkFunction(pluginFunctionContext, functionCacheFactory.newCache())));
        } else if (SparkSink.PLUGIN_TYPE.equals(pluginType)) {
            SparkSink<Object> sparkSink = pluginContext.newPluginInstance(stageName, macroEvaluator);
            sinkRunnables.add(stageData.createStoreTask(stageSpec, sparkSink));
        } else if (AlertPublisher.PLUGIN_TYPE.equals(pluginType)) {
            // union all the alerts coming into this stage
            SparkCollection<Alert> inputAlerts = null;
            for (String inputStage : stageInputs) {
                SparkCollection<Alert> inputErrorsFromStage = emittedRecords.get(inputStage).alertRecords;
                if (inputErrorsFromStage == null) {
                    continue;
                }
                if (inputAlerts == null) {
                    inputAlerts = inputErrorsFromStage;
                } else {
                    inputAlerts = inputAlerts.union(inputErrorsFromStage);
                }
            }
            if (inputAlerts != null) {
                inputAlerts.publishAlerts(stageSpec, collector);
            }
        } else if (ErrorTransform.PLUGIN_TYPE.equals(pluginType)) {
            // union all the errors coming into this stage
            SparkCollection<ErrorRecord<Object>> inputErrors = null;
            for (String inputStage : stageInputs) {
                SparkCollection<ErrorRecord<Object>> inputErrorsFromStage = emittedRecords.get(inputStage).errorRecords;
                if (inputErrorsFromStage == null) {
                    continue;
                }
                if (inputErrors == null) {
                    inputErrors = inputErrorsFromStage;
                } else {
                    inputErrors = inputErrors.union(inputErrorsFromStage);
                }
            }
            if (inputErrors != null) {
                SparkCollection<RecordInfo<Object>> combinedData = inputErrors.flatMap(stageSpec, new ErrorTransformFunction<Object, Object>(pluginFunctionContext, functionCacheFactory.newCache()));
                emittedBuilder = addEmitted(emittedBuilder, pipelinePhase, stageSpec, combinedData, groupedDag, branchers, shufflers, hasErrorOutput, hasAlertOutput);
            }
        } else {
            Object plugin = pluginContext.newPluginInstance(stageName, macroEvaluator);
            Optional<EmittedRecords.Builder> declarativeBuilder = tryRelationalTransform(pipelinePhase, groupedDag, branchers, shufflers, stageName, stageSpec, emittedBuilder, hasErrorOutput, hasAlertOutput, stageData, inputDataCollections, plugin);
            if (declarativeBuilder.isPresent()) {
                emittedBuilder = declarativeBuilder.get();
            } else {
                emittedBuilder = transform(emittedBuilder, stagePartitions, pipelinePhase, functionCacheFactory, groupedDag, branchers, shufflers, stageName, stageSpec, pluginType, hasErrorOutput, hasAlertOutput, stageData, inputDataCollections, collector, pluginFunctionContext, plugin);
            }
        }
        emittedRecords.put(stageName, emittedBuilder.build());
    }
    boolean shouldWriteInParallel = Boolean.parseBoolean(sec.getRuntimeArguments().get("pipeline.spark.parallel.sinks.enabled"));
    if (!shouldWriteInParallel) {
        for (Runnable runnable : sinkRunnables) {
            runnable.run();
        }
        return;
    }
    Collection<Future> sinkFutures = new ArrayList<>(sinkRunnables.size());
    ExecutorService executorService = Executors.newFixedThreadPool(sinkRunnables.size(), new ThreadFactoryBuilder().setNameFormat("pipeline-sink-task").build());
    for (Runnable runnable : sinkRunnables) {
        sinkFutures.add(executorService.submit(runnable));
    }
    Throwable error = null;
    Iterator<Future> futureIter = sinkFutures.iterator();
    for (Future future : sinkFutures) {
        try {
            future.get();
        } catch (ExecutionException e) {
            error = e.getCause();
            break;
        } catch (InterruptedException e) {
            break;
        }
    }
    executorService.shutdownNow();
    if (error != null) {
        throw Throwables.propagate(error);
    }
}
Also used : DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ArrayList(java.util.ArrayList) PluginFunctionContext(io.cdap.cdap.etl.spark.function.PluginFunctionContext) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) BasicArguments(io.cdap.cdap.etl.common.BasicArguments) ExecutionException(java.util.concurrent.ExecutionException) FunctionCache(io.cdap.cdap.etl.spark.function.FunctionCache) HashSet(java.util.HashSet) NoopStageStatisticsCollector(io.cdap.cdap.etl.common.NoopStageStatisticsCollector) RecordInfo(io.cdap.cdap.etl.common.RecordInfo) CombinerDag(io.cdap.cdap.etl.planner.CombinerDag) BatchSinkFunction(io.cdap.cdap.etl.spark.function.BatchSinkFunction) StageStatisticsCollector(io.cdap.cdap.etl.common.StageStatisticsCollector) NoopStageStatisticsCollector(io.cdap.cdap.etl.common.NoopStageStatisticsCollector) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ErrorRecord(io.cdap.cdap.etl.api.ErrorRecord)

Example 9 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class PipelinePlannerTest method testMultipleConditions.

@Test
public void testMultipleConditions() {
    /*
      n1 - n2 - condition1 - n3 - n4 - condition2 - n5 - condition3 - n6
                    |                       |                   |
                    |--n10                  |---condition4 - n8 |------n7
                                                  |
                                                  |----condition5 - n9
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition1", CONDITION1).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("condition2", CONDITION2).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("condition3", CONDITION3).build(), StageSpec.builder("n6", NODE).build(), StageSpec.builder("condition4", CONDITION4).build(), StageSpec.builder("n7", NODE).build(), StageSpec.builder("condition5", CONDITION5).build(), StageSpec.builder("n8", NODE).build(), StageSpec.builder("n9", NODE).build(), StageSpec.builder("n10", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition1"), new Connection("condition1", "n3", true), new Connection("condition1", "n10", false), new Connection("n3", "n4"), new Connection("n4", "condition2"), new Connection("condition2", "n5", true), new Connection("n5", "condition3"), new Connection("condition3", "n6", true), new Connection("condition3", "n7", false), new Connection("condition2", "condition4", false), new Connection("condition4", "n8", true), new Connection("condition4", "condition5", false), new Connection("condition5", "n9", true));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION1.getType(), CONDITION2.getType(), CONDITION3.getType(), CONDITION4.getType(), CONDITION5.getType());
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
      n1--n2--condition1.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n2", NODE).build()).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n2").addConnection("n2", "condition1.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition1")));
    String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase1Name, phase1);
    /*
      condition1
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1", CONDITION1).build()).build();
    String phase2Name = "condition1";
    phases.put(phase2Name, phase2);
    /*
      condition1.connector -- n3 - n4 - condition2.connector
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SINK_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addStage(StageSpec.builder("n4", NODE).build()).addConnection("condition1.connector", "n3").addConnection("n3", "n4").addConnection("n4", "condition2.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition1", "n3"), new Connection("n3", "n4"), new Connection("n4", "condition2")));
    String phase3Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase3Name, phase3);
    /*
      condition1.connector -- n10
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n10", NODE).build()).addConnection("condition1.connector", "n10").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition1", "n10")));
    String phase4Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase4Name, phase4);
    /*
      condition2
     */
    PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2", CONDITION2).build()).build();
    String phase5Name = "condition2";
    phases.put(phase5Name, phase5);
    /*
      condition2.connector -- n5 -- condition3.connector
     */
    PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n5", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("condition2.connector", "n5").addConnection("n5", "condition3.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition2", "n5"), new Connection("n5", "condition3")));
    String phase6Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase6Name, phase6);
    /*
      condition3
     */
    PipelinePhase phase7 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition3", CONDITION3).build()).build();
    String phase7Name = "condition3";
    phases.put(phase7Name, phase7);
    /*
      condition3.connector -- n6
     */
    PipelinePhase phase8 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n6", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition3.connector", "n6").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n6")));
    String phase8Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase8Name, phase8);
    /*
      condition3.connector -- n7
     */
    PipelinePhase phase9 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n7", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition3.connector", "n7").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n7")));
    String phase9Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase9Name, phase9);
    /*
      condition4
     */
    PipelinePhase phase10 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition4", CONDITION4).build()).build();
    String phase10Name = "condition4";
    phases.put(phase10Name, phase10);
    /*
      condition4(condition2.connector) -- n8
     */
    PipelinePhase phase11 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n8", NODE).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition2.connector", "n8").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition4", "n8")));
    String phase11Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase11Name, phase11);
    /*
      condition5
     */
    PipelinePhase phase12 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition5", CONDITION5).build()).build();
    String phase12Name = "condition5";
    phases.put(phase12Name, phase12);
    /*
      condition5(condition2.connector) -- n9
     */
    PipelinePhase phase13 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n9", NODE).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition2.connector", "n9").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition5", "n9")));
    String phase13Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase13Name, phase13);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase2Name, phase3Name, true));
    phaseConnections.add(new Connection(phase2Name, phase4Name, false));
    phaseConnections.add(new Connection(phase3Name, phase5Name));
    phaseConnections.add(new Connection(phase5Name, phase6Name, true));
    phaseConnections.add(new Connection(phase6Name, phase7Name));
    phaseConnections.add(new Connection(phase7Name, phase8Name, true));
    phaseConnections.add(new Connection(phase7Name, phase9Name, false));
    phaseConnections.add(new Connection(phase5Name, phase10Name, false));
    phaseConnections.add(new Connection(phase10Name, phase11Name, true));
    phaseConnections.add(new Connection(phase10Name, phase12Name, false));
    phaseConnections.add(new Connection(phase12Name, phase13Name, true));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 10 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class PipelinePlannerTest method testMultipleActionConditions.

@Test
public void testMultipleActionConditions() {
    /*
                                                   |-- n2 -- a3
            |-- a1 --|        |-- n0 -- n1 -- c1 --|                          |-- a5 --|
        a0--|        |-- c0 --|                    |-- n3 -- c2 -- n8 -- a4 --|        |-- a7
            |-- a2 --|        |                                               |-- a6 --|
                              |        |-- n4 -- n5 -- c4 -- c5 -- n9
                              |-- c3 --|
                                       |              |-- a8
                                       |-- n6 -- n7 --|
                                                      |-- a9
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("a0", ACTION).build(), StageSpec.builder("a1", ACTION).build(), StageSpec.builder("a2", ACTION).build(), StageSpec.builder("a3", ACTION).build(), StageSpec.builder("a4", ACTION).build(), StageSpec.builder("a5", ACTION).build(), StageSpec.builder("a6", ACTION).build(), StageSpec.builder("a7", ACTION).build(), StageSpec.builder("a8", ACTION).build(), StageSpec.builder("a9", ACTION).build(), StageSpec.builder("c0", CONDITION).build(), StageSpec.builder("c1", CONDITION).build(), StageSpec.builder("c2", CONDITION).build(), StageSpec.builder("c3", CONDITION).build(), StageSpec.builder("c4", CONDITION).build(), StageSpec.builder("c5", CONDITION).build(), StageSpec.builder("n0", NODE).build(), StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("n6", NODE).build(), StageSpec.builder("n7", NODE).build(), StageSpec.builder("n8", NODE).build(), StageSpec.builder("n9", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("a0", "a1"), new Connection("a0", "a2"), new Connection("a1", "c0"), new Connection("a2", "c0"), new Connection("c0", "n0", true), new Connection("c0", "c3", false), new Connection("n0", "n1"), new Connection("n1", "c1"), new Connection("c1", "n2", true), new Connection("c1", "n3", false), new Connection("n2", "a3"), new Connection("n3", "c2"), new Connection("c2", "n8", true), new Connection("n8", "a4"), new Connection("a4", "a5"), new Connection("a4", "a6"), new Connection("a5", "a7"), new Connection("a6", "a7"), new Connection("c3", "n4", true), new Connection("c3", "n6", false), new Connection("n4", "n5"), new Connection("n5", "c4"), new Connection("c4", "c5", true), new Connection("c5", "n9", true), new Connection("n6", "n7"), new Connection("n7", "a8"), new Connection("n7", "a9"));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), ACTION.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
    Set<String> reduceTypes = ImmutableSet.of();
    Set<String> emptySet = ImmutableSet.of();
    Set<String> actionTypes = ImmutableSet.of(ACTION.getType());
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, actionTypes, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a0", "a2"));
    phaseConnections.add(new Connection("a1", "c0"));
    phaseConnections.add(new Connection("a2", "c0"));
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a4", "a5"));
    phaseConnections.add(new Connection("a4", "a6"));
    phaseConnections.add(new Connection("a5", "a7"));
    phaseConnections.add(new Connection("a6", "a7"));
    phaseConnections.add(new Connection("c0", "c3", false));
    phaseConnections.add(new Connection("c4", "c5", true));
    for (String action : ImmutableList.of("a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9")) {
        phases.put(action, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(action, ACTION).build()).build());
    }
    for (String condition : ImmutableList.of("c0", "c1", "c2", "c3", "c4", "c5")) {
        phases.put(condition, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(condition, CONDITION).build()).build());
    }
    // [c0] --true-->  [c0 -- n0 -- n1 -- c1]
    PipelinePhase phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n0", NODE).build()).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n0", "n1").addConnection("n1", "c1.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c0", "n0"), new Connection("n0", "n1"), new Connection("n1", "c1")));
    String phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c0", phaseName, true));
    // [c0 -- n0 -- n1 -- c1] --> [c1]
    phaseConnections.add(new Connection(phaseName, "c1"));
    // [c1] --true--> [c1 -- n2 -- a3]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("c1.connector", "n2").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c1", "n2"), new Connection("n2", "a3")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c1", phaseName, true));
    // [c1 -- n2 -- a3] -- [a3]
    phaseConnections.add(new Connection(phaseName, "a3"));
    // [c1] --false--> [c1 -- n3 -- c2]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("c1.connector", "n3").addConnection("n3", "c2.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c1", "n3"), new Connection("n3", "c2")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c1", phaseName, false));
    // [c1.connector -- n3 -- c2.connector] --> [c2]
    phaseConnections.add(new Connection(phaseName, "c2"));
    // [c2] --true--> [c2 -- n8 -- a4]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n8", NODE).build()).addConnection("c2.connector", "n8").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c2", "n8"), new Connection("n8", "a4")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c2", phaseName, true));
    // [c2 -- n8 -- a4] --> [a4]
    phaseConnections.add(new Connection(phaseName, "a4"));
    // [c3] --true--> [c3 -- n4 -- n5 -- c4]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n4", NODE).build()).addStage(StageSpec.builder("n5", NODE).build()).addStage(StageSpec.builder("c4.connector", connectorSpec("c4.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n4", "n5").addConnection("n5", "c4.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c3", "n4"), new Connection("n4", "n5"), new Connection("n5", "c4")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c3", phaseName, true));
    // [c3 -- n4 -- n5 -- c4] --> c4
    phaseConnections.add(new Connection(phaseName, "c4"));
    // [c5] --true--> [c5 (via c4.connector) -- n9]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c4.connector", connectorSpec("c4.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n9", NODE).build()).addConnection("c4.connector", "n9").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c5", "n9")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c5", phaseName, true));
    // [c3] --false--> [c3 -- n6 -- n7 -- a8, a9]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n6", NODE).build()).addStage(StageSpec.builder("n7", NODE).build()).addConnection("n6", "n7").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c3", "n6"), new Connection("n6", "n7"), new Connection("n7", "a8"), new Connection("n7", "a9")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c3", phaseName, false));
    // [c3 -- n6 -- n7 -- a8, a9] --> [a8]
    // [c3 -- n6 -- n7 -- a8, a9] --> [a9]
    phaseConnections.add(new Connection(phaseName, "a8"));
    phaseConnections.add(new Connection(phaseName, "a9"));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)15 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)14 HashMap (java.util.HashMap)10 HashSet (java.util.HashSet)8 Connection (io.cdap.cdap.etl.proto.Connection)7 PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)6 Test (org.junit.Test)6 DefaultMacroEvaluator (io.cdap.cdap.etl.common.DefaultMacroEvaluator)4 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)3 BatchPhaseSpec (io.cdap.cdap.etl.batch.BatchPhaseSpec)3 Map (java.util.Map)3 ImmutableSet (com.google.common.collect.ImmutableSet)2 PluginContext (io.cdap.cdap.api.plugin.PluginContext)2 WorkflowToken (io.cdap.cdap.api.workflow.WorkflowToken)2 PipelinePluginInstantiator (io.cdap.cdap.etl.batch.PipelinePluginInstantiator)2 BasicArguments (io.cdap.cdap.etl.common.BasicArguments)2 PipelineRuntime (io.cdap.cdap.etl.common.PipelineRuntime)2 PipelinePluginContext (io.cdap.cdap.etl.common.plugin.PipelinePluginContext)2 Set (java.util.Set)2 ImmutableMap (com.google.common.collect.ImmutableMap)1