Search in sources :

Example 1 with PipelinePhase

use of co.cask.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class ETLWorker method configure.

@Override
public void configure() {
    setName(NAME);
    setDescription("Worker Driver for Realtime ETL Pipelines");
    int instances = config.getInstances();
    if (instances < 1) {
        throw new IllegalArgumentException("instances must be greater than 0.");
    }
    setInstances(instances);
    if (config.getResources() != null) {
        setResources(config.getResources());
    }
    PipelineSpecGenerator<ETLRealtimeConfig, PipelineSpec> specGenerator = new RealtimePipelineSpecGenerator(getConfigurer(), ImmutableSet.of(RealtimeSource.PLUGIN_TYPE), ImmutableSet.of(RealtimeSink.PLUGIN_TYPE), Table.class, TableProperties.builder().setSchema(ERROR_SCHEMA).build());
    PipelineSpec spec = specGenerator.generateSpec(config);
    int sourceCount = 0;
    for (StageSpec stageSpec : spec.getStages()) {
        if (RealtimeSource.PLUGIN_TYPE.equals(stageSpec.getPlugin().getType())) {
            sourceCount++;
        }
    }
    if (sourceCount != 1) {
        throw new IllegalArgumentException("Invalid pipeline. There must only be one source.");
    }
    PipelinePlanner planner = new PipelinePlanner(SUPPORTED_PLUGIN_TYPES, ImmutableSet.<String>of(), ImmutableSet.<String>of(), ImmutableSet.<String>of());
    PipelinePlan plan = planner.plan(spec);
    if (plan.getPhases().size() != 1) {
        // should never happen
        throw new IllegalArgumentException("There was an error planning the pipeline. There should only be one phase.");
    }
    PipelinePhase pipeline = plan.getPhases().values().iterator().next();
    Map<String, String> properties = new HashMap<>();
    properties.put(Constants.PIPELINE_SPEC_KEY, GSON.toJson(spec));
    properties.put(Constants.PIPELINEID, GSON.toJson(pipeline));
    // Generate unique id for this app creation.
    properties.put(UNIQUE_ID, String.valueOf(System.currentTimeMillis()));
    properties.put(Constants.STAGE_LOGGING_ENABLED, String.valueOf(config.isStageLoggingEnabled()));
    setProperties(properties);
}
Also used : PipelinePlan(co.cask.cdap.etl.planner.PipelinePlan) PipelinePlanner(co.cask.cdap.etl.planner.PipelinePlanner) HashMap(java.util.HashMap) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) PipelineSpec(co.cask.cdap.etl.spec.PipelineSpec) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec)

Example 2 with PipelinePhase

use of co.cask.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class ETLMapReduce method initialize.

@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void initialize() throws Exception {
    final MapReduceContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    if (Boolean.valueOf(properties.get(Constants.STAGE_LOGGING_ENABLED))) {
        LogStageInjector.start();
    }
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, mrMetrics);
    List<Finisher> finishers = new ArrayList<>();
    final Job job = context.getHadoopJob();
    final Configuration hConf = job.getConfiguration();
    hConf.setBoolean("mapreduce.map.speculative", false);
    hConf.setBoolean("mapreduce.reduce.speculative", false);
    // plugin name -> runtime args for that plugin
    MacroEvaluator evaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context.getNamespace());
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    Set<String> connectorDatasets = GSON.fromJson(properties.get(Constants.CONNECTOR_DATASETS), CONNECTOR_DATASETS_TYPE);
    for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
        hConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
    }
    final PipelinePhase phase = phaseSpec.getPhase();
    PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(context, mrMetrics, phaseSpec, new MultiConnectorFactory());
    // should never happen if planner is correct
    Set<StageSpec> reducers = phaseSpec.getPhase().getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
    if (reducers.size() > 1) {
        Iterator<StageSpec> reducerIter = reducers.iterator();
        StringBuilder reducersStr = new StringBuilder(reducerIter.next().getName());
        while (reducerIter.hasNext()) {
            reducersStr.append(",");
            reducersStr.append(reducerIter.next().getName());
        }
        throw new IllegalStateException("Found multiple reducers ( " + reducersStr + " ) in the same pipeline phase. " + "This means there was a bug in planning the pipeline when it was deployed. ");
    }
    job.setMapperClass(ETLMapper.class);
    if (reducers.isEmpty()) {
        job.setNumReduceTasks(0);
    } else {
        job.setReducerClass(ETLReducer.class);
    }
    final Map<String, SinkOutput> sinkOutputs = new HashMap<>();
    final Map<String, String> inputAliasToStage = new HashMap<>();
    // call prepareRun on each stage in order so that any arguments set by a stage will be visible to subsequent stages
    for (final String stageName : phase.getDag().getTopologicalOrder()) {
        final StageSpec stageSpec = phase.getStage(stageName);
        String pluginType = stageSpec.getPluginType();
        boolean isConnectorSource = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && phase.getSources().contains(stageName);
        boolean isConnectorSink = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && phase.getSinks().contains(stageName);
        SubmitterPlugin submitterPlugin = null;
        if (BatchSource.PLUGIN_TYPE.equals(pluginType) || isConnectorSource) {
            BatchConfigurable<BatchSourceContext> batchSource = pluginInstantiator.newPluginInstance(stageName, evaluator);
            ContextProvider<MapReduceBatchContext> contextProvider = new MapReduceBatchContextProvider(context, pipelineRuntime, stageSpec, connectorDatasets);
            submitterPlugin = new SubmitterPlugin<>(stageName, context, batchSource, contextProvider, new SubmitterPlugin.PrepareAction<MapReduceBatchContext>() {

                @Override
                public void act(MapReduceBatchContext sourceContext) {
                    for (String inputAlias : sourceContext.getInputNames()) {
                        inputAliasToStage.put(inputAlias, stageName);
                    }
                }
            });
        } else if (BatchSink.PLUGIN_TYPE.equals(pluginType) || AlertPublisher.PLUGIN_TYPE.equals(pluginType) || isConnectorSink) {
            BatchConfigurable<BatchSinkContext> batchSink = pluginInstantiator.newPluginInstance(stageName, evaluator);
            ContextProvider<MapReduceBatchContext> contextProvider = new MapReduceBatchContextProvider(context, pipelineRuntime, stageSpec, connectorDatasets);
            submitterPlugin = new SubmitterPlugin<>(stageName, context, batchSink, contextProvider, new SubmitterPlugin.PrepareAction<MapReduceBatchContext>() {

                @Override
                public void act(MapReduceBatchContext sinkContext) {
                    sinkOutputs.put(stageName, new SinkOutput(sinkContext.getOutputNames()));
                }
            });
        } else if (Transform.PLUGIN_TYPE.equals(pluginType)) {
            Transform<?, ?> transform = pluginInstantiator.newPluginInstance(stageName, evaluator);
            ContextProvider<MapReduceBatchContext> contextProvider = new MapReduceBatchContextProvider(context, pipelineRuntime, stageSpec, connectorDatasets);
            submitterPlugin = new SubmitterPlugin<>(stageName, context, transform, contextProvider);
        } else if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
            final BatchAggregator<?, ?, ?> aggregator = pluginInstantiator.newPluginInstance(stageName, evaluator);
            ContextProvider<DefaultAggregatorContext> contextProvider = new AggregatorContextProvider(pipelineRuntime, stageSpec, context.getAdmin());
            submitterPlugin = new SubmitterPlugin<>(stageName, context, aggregator, contextProvider, new SubmitterPlugin.PrepareAction<DefaultAggregatorContext>() {

                @Override
                public void act(DefaultAggregatorContext aggregatorContext) {
                    if (aggregatorContext.getNumPartitions() != null) {
                        job.setNumReduceTasks(aggregatorContext.getNumPartitions());
                    }
                    Class<?> outputKeyClass = aggregatorContext.getGroupKeyClass();
                    Class<?> outputValClass = aggregatorContext.getGroupValueClass();
                    if (outputKeyClass == null) {
                        outputKeyClass = TypeChecker.getGroupKeyClass(aggregator);
                    }
                    if (outputValClass == null) {
                        outputValClass = TypeChecker.getGroupValueClass(aggregator);
                    }
                    hConf.set(MAP_KEY_CLASS, outputKeyClass.getName());
                    hConf.set(MAP_VAL_CLASS, outputValClass.getName());
                    job.setMapOutputKeyClass(getOutputKeyClass(stageName, outputKeyClass));
                    job.setMapOutputValueClass(getOutputValClass(stageName, outputValClass));
                }
            });
        } else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
            final BatchJoiner<?, ?, ?> batchJoiner = pluginInstantiator.newPluginInstance(stageName, evaluator);
            ContextProvider<DefaultJoinerContext> contextProvider = new JoinerContextProvider(pipelineRuntime, stageSpec, context.getAdmin());
            submitterPlugin = new SubmitterPlugin<>(stageName, context, batchJoiner, contextProvider, new SubmitterPlugin.PrepareAction<DefaultJoinerContext>() {

                @Override
                public void act(DefaultJoinerContext joinerContext) {
                    if (joinerContext.getNumPartitions() != null) {
                        job.setNumReduceTasks(joinerContext.getNumPartitions());
                    }
                    Class<?> outputKeyClass = joinerContext.getJoinKeyClass();
                    Class<?> inputRecordClass = joinerContext.getJoinInputRecordClass();
                    if (outputKeyClass == null) {
                        outputKeyClass = TypeChecker.getJoinKeyClass(batchJoiner);
                    }
                    if (inputRecordClass == null) {
                        inputRecordClass = TypeChecker.getJoinInputRecordClass(batchJoiner);
                    }
                    hConf.set(MAP_KEY_CLASS, outputKeyClass.getName());
                    hConf.set(MAP_VAL_CLASS, inputRecordClass.getName());
                    job.setMapOutputKeyClass(getOutputKeyClass(stageName, outputKeyClass));
                    getOutputValClass(stageName, inputRecordClass);
                    // for joiner plugin map output is tagged with stageName
                    job.setMapOutputValueClass(TaggedWritable.class);
                }
            });
        }
        if (submitterPlugin != null) {
            submitterPlugin.prepareRun();
            finishers.add(submitterPlugin);
        }
    }
    hConf.set(SINK_OUTPUTS_KEY, GSON.toJson(sinkOutputs));
    hConf.set(INPUT_ALIAS_KEY, GSON.toJson(inputAliasToStage));
    finisher = new CompositeFinisher(finishers);
    job.setMapperClass(ETLMapper.class);
    WorkflowToken token = context.getWorkflowToken();
    if (token != null) {
        for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
            token.put(entry.getKey(), entry.getValue());
        }
    }
    // token is null when just the mapreduce job is run but not the entire workflow
    // we still want things to work in that case.
    hConf.set(RUNTIME_ARGS_KEY, GSON.toJson(pipelineRuntime.getArguments().asMap()));
}
Also used : DefaultAggregatorContext(co.cask.cdap.etl.batch.DefaultAggregatorContext) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CompositeFinisher(co.cask.cdap.etl.common.submit.CompositeFinisher) SubmitterPlugin(co.cask.cdap.etl.common.submit.SubmitterPlugin) Finisher(co.cask.cdap.etl.common.submit.Finisher) CompositeFinisher(co.cask.cdap.etl.common.submit.CompositeFinisher) BatchAggregator(co.cask.cdap.etl.api.batch.BatchAggregator) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) Job(org.apache.hadoop.mapreduce.Job) JoinerContextProvider(co.cask.cdap.etl.common.submit.JoinerContextProvider) ContextProvider(co.cask.cdap.etl.common.submit.ContextProvider) AggregatorContextProvider(co.cask.cdap.etl.common.submit.AggregatorContextProvider) MapReduceContext(co.cask.cdap.api.mapreduce.MapReduceContext) JoinerContextProvider(co.cask.cdap.etl.common.submit.JoinerContextProvider) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) AggregatorContextProvider(co.cask.cdap.etl.common.submit.AggregatorContextProvider) Map(java.util.Map) HashMap(java.util.HashMap) PipelineRuntime(co.cask.cdap.etl.common.PipelineRuntime) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(co.cask.cdap.api.macro.MacroEvaluator) WorkflowToken(co.cask.cdap.api.workflow.WorkflowToken) DefaultJoinerContext(co.cask.cdap.etl.batch.DefaultJoinerContext) StageSpec(co.cask.cdap.etl.spec.StageSpec) MultiConnectorFactory(co.cask.cdap.etl.batch.connector.MultiConnectorFactory) PipelinePluginInstantiator(co.cask.cdap.etl.batch.PipelinePluginInstantiator) BatchSourceContext(co.cask.cdap.etl.api.batch.BatchSourceContext) BatchPhaseSpec(co.cask.cdap.etl.batch.BatchPhaseSpec) BatchConfigurable(co.cask.cdap.etl.api.batch.BatchConfigurable) TransactionPolicy(co.cask.cdap.api.annotation.TransactionPolicy)

Example 3 with PipelinePhase

use of co.cask.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class PipelineCondition method apply.

@Override
public boolean apply(@Nullable WorkflowContext input) {
    if (input == null) {
        // should not happen
        throw new IllegalStateException("WorkflowContext for the Condition cannot be null.");
    }
    Map<String, String> properties = input.getConditionSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    PipelinePhase phase = phaseSpec.getPhase();
    StageSpec stageSpec = phase.iterator().next();
    PluginContext pluginContext = new PipelinePluginContext(input, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
    MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(new BasicArguments(input.getToken(), input.getRuntimeArguments()), input.getLogicalStartTime(), input, input.getNamespace());
    try {
        Condition condition = pluginContext.newPluginInstance(stageSpec.getName(), macroEvaluator);
        PipelineRuntime pipelineRuntime = new PipelineRuntime(input, metrics);
        ConditionContext conditionContext = new BasicConditionContext(input, pipelineRuntime, stageSpec);
        boolean result = condition.apply(conditionContext);
        WorkflowToken token = input.getToken();
        if (token == null) {
            throw new IllegalStateException("WorkflowToken cannot be null when Condition is executed through Workflow.");
        }
        for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
            token.put(entry.getKey(), entry.getValue());
        }
        return result;
    } catch (Exception e) {
        String msg = String.format("Error executing condition '%s' in the pipeline.", stageSpec.getName());
        throw new RuntimeException(msg, e);
    }
}
Also used : Condition(co.cask.cdap.etl.api.condition.Condition) AbstractCondition(co.cask.cdap.api.workflow.AbstractCondition) MacroEvaluator(co.cask.cdap.api.macro.MacroEvaluator) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) PipelineRuntime(co.cask.cdap.etl.common.PipelineRuntime) PluginContext(co.cask.cdap.api.plugin.PluginContext) PipelinePluginContext(co.cask.cdap.etl.common.plugin.PipelinePluginContext) WorkflowToken(co.cask.cdap.api.workflow.WorkflowToken) ConditionContext(co.cask.cdap.etl.api.condition.ConditionContext) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) BatchPhaseSpec(co.cask.cdap.etl.batch.BatchPhaseSpec) BasicArguments(co.cask.cdap.etl.common.BasicArguments) HashMap(java.util.HashMap) Map(java.util.Map) PipelinePluginContext(co.cask.cdap.etl.common.plugin.PipelinePluginContext)

Example 4 with PipelinePhase

use of co.cask.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class PipelineAction method run.

@Override
public void run() throws Exception {
    CustomActionContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    PipelinePhase phase = phaseSpec.getPhase();
    StageSpec stageSpec = phase.iterator().next();
    PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
    Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context.getNamespace()));
    ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
    if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
        action.run(actionContext);
    }
    WorkflowToken token = context.getWorkflowToken();
    if (token == null) {
        throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
    }
    for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
        token.put(entry.getKey(), entry.getValue());
    }
}
Also used : Action(co.cask.cdap.etl.api.action.Action) CustomAction(co.cask.cdap.api.customaction.CustomAction) AbstractCustomAction(co.cask.cdap.api.customaction.AbstractCustomAction) PipelineRuntime(co.cask.cdap.etl.common.PipelineRuntime) PipelinePluginContext(co.cask.cdap.etl.common.plugin.PipelinePluginContext) PluginContext(co.cask.cdap.api.plugin.PluginContext) WorkflowToken(co.cask.cdap.api.workflow.WorkflowToken) CustomActionContext(co.cask.cdap.api.customaction.CustomActionContext) ActionContext(co.cask.cdap.etl.api.action.ActionContext) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) CustomActionContext(co.cask.cdap.api.customaction.CustomActionContext) BatchPhaseSpec(co.cask.cdap.etl.batch.BatchPhaseSpec) HashMap(java.util.HashMap) Map(java.util.Map) PipelinePluginContext(co.cask.cdap.etl.common.plugin.PipelinePluginContext)

Example 5 with PipelinePhase

use of co.cask.cdap.etl.common.PipelinePhase in project cdap by caskdata.

the class PipelinePlannerTest method testGeneratePlan.

@Test
public void testGeneratePlan() {
    /*
             |--- n2(r) ----------|
             |                    |                                    |-- n10
        n1 --|--- n3(r) --- n5 ---|--- n6 --- n7(r) --- n8 --- n9(r) --|
             |                    |                                    |-- n11
             |--- n4(r) ----------|
     */
    // create the spec for this pipeline
    Schema schema = Schema.recordOf("stuff", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).addOutputSchema(schema, "n2", "n3", "n4").build(), StageSpec.builder("n2", REDUCE).addInputSchema("n1", schema).addOutputSchema(schema, "n6").build(), StageSpec.builder("n3", REDUCE).addInputSchema("n1", schema).addOutputSchema(schema, "n5").build(), StageSpec.builder("n4", REDUCE).addInputSchema("n1", schema).addOutputSchema(schema, "n6").build(), StageSpec.builder("n5", NODE).addInputSchema("n3", schema).addOutputSchema(schema, "n6").build(), StageSpec.builder("n6", NODE).addInputSchemas(ImmutableMap.of("n2", schema, "n5", schema, "n4", schema)).addOutputSchema(schema, "n7").build(), StageSpec.builder("n7", REDUCE).addInputSchema("n6", schema).addOutputSchema(schema, "n8").build(), StageSpec.builder("n8", NODE).addInputSchema("n7", schema).addOutputSchema(schema, "n9").build(), StageSpec.builder("n9", REDUCE).addInputSchema("n8", schema).addOutputSchema(schema, "n10", "n11").build(), StageSpec.builder("n10", NODE).addInputSchema("n9", schema).build(), StageSpec.builder("n11", NODE).addInputSchema("n9", schema).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n1", "n3"), new Connection("n1", "n4"), new Connection("n2", "n6"), new Connection("n3", "n5"), new Connection("n4", "n6"), new Connection("n5", "n6"), new Connection("n6", "n7"), new Connection("n7", "n8"), new Connection("n8", "n9"), new Connection("n9", "n10"), new Connection("n9", "n11"));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE);
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
        n1 --> n1.out.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).addOutputSchema(schema, "n2", "n3", "n4").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SINK_TYPE)).build()).addConnections("n1", ImmutableSet.of("n1.out.connector")).build();
    String phase1Name = PipelinePlanner.getPhaseName(phase1.getDag());
    phases.put(phase1Name, phase1);
    /*
        phase2:
        n1.out.connector --- n2(r) --- n6 --- n7.connector
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n2", REDUCE).addInputSchema("n1", schema).addOutputSchema(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutputSchema(schema, "n7").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n2").addConnection("n2", "n6").addConnection("n6", "n7.connector").build();
    String phase2Name = PipelinePlanner.getPhaseName(phase2.getDag());
    phases.put(phase2Name, phase2);
    /*
        phase3:
        n1.out.connector --- n3(r) --- n5 --- n6 --- n7.connector
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n5", NODE).addInputSchema("n3", schema).addOutputSchema(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutputSchema(schema, "n7").build()).addStage(StageSpec.builder("n3", REDUCE).addInputSchema("n1", schema).addOutputSchema(schema, "n5").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n3").addConnection("n3", "n5").addConnection("n5", "n6").addConnection("n6", "n7.connector").build();
    String phase3Name = PipelinePlanner.getPhaseName(phase3.getDag());
    phases.put(phase3Name, phase3);
    /*
        phase4:
        n1.out.connector --- n4(r) --- n6 --- n7.connector
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n4", REDUCE).addInputSchema("n1", schema).addOutputSchema(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutputSchema(schema, "n7").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n4").addConnection("n4", "n6").addConnection("n6", "n7.connector").build();
    String phase4Name = PipelinePlanner.getPhaseName(phase4.getDag());
    phases.put(phase4Name, phase4);
    /*
        phase5:
        n7.connector --- n7(r) --- n8 --- n9.connector
     */
    PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n8", NODE).addInputSchema("n7", schema).addOutputSchema(schema, "n9").build()).addStage(StageSpec.builder("n7", REDUCE).addInputSchema("n6", schema).addOutputSchema(schema, "n8").build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n9.connector", connectorSpec("n9", Constants.Connector.SINK_TYPE)).build()).addConnection("n7.connector", "n7").addConnection("n7", "n8").addConnection("n8", "n9.connector").build();
    String phase5Name = PipelinePlanner.getPhaseName(phase5.getDag());
    phases.put(phase5Name, phase5);
    /*
        phase6:
                                 |-- n10
        n9.connector --- n9(r) --|
                                 |-- n11
     */
    PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n10", NODE).addInputSchema("n9", schema).build()).addStage(StageSpec.builder("n11", NODE).addInputSchema("n9", schema).build()).addStage(StageSpec.builder("n9", REDUCE).addInputSchema("n8", schema).addOutputSchema(schema, "n10", "n11").build()).addStage(StageSpec.builder("n9.connector", connectorSpec("n9", Constants.Connector.SOURCE_TYPE)).build()).addConnection("n9.connector", "n9").addConnection("n9", "n10").addConnection("n9", "n11").build();
    String phase6Name = PipelinePlanner.getPhaseName(phase6.getDag());
    phases.put(phase6Name, phase6);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase1Name, phase3Name));
    phaseConnections.add(new Connection(phase1Name, phase4Name));
    phaseConnections.add(new Connection(phase2Name, phase5Name));
    phaseConnections.add(new Connection(phase3Name, phase5Name));
    phaseConnections.add(new Connection(phase4Name, phase5Name));
    phaseConnections.add(new Connection(phase5Name, phase6Name));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Schema(co.cask.cdap.api.data.schema.Schema) Connection(co.cask.cdap.etl.proto.Connection) PipelineSpec(co.cask.cdap.etl.spec.PipelineSpec) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

PipelinePhase (co.cask.cdap.etl.common.PipelinePhase)17 StageSpec (co.cask.cdap.etl.spec.StageSpec)15 HashMap (java.util.HashMap)13 HashSet (java.util.HashSet)7 BatchPhaseSpec (co.cask.cdap.etl.batch.BatchPhaseSpec)5 Connection (co.cask.cdap.etl.proto.Connection)5 PipelineSpec (co.cask.cdap.etl.spec.PipelineSpec)5 WorkflowToken (co.cask.cdap.api.workflow.WorkflowToken)4 DefaultMacroEvaluator (co.cask.cdap.etl.common.DefaultMacroEvaluator)4 PipelineRuntime (co.cask.cdap.etl.common.PipelineRuntime)4 Map (java.util.Map)4 Test (org.junit.Test)4 DatasetContext (co.cask.cdap.api.data.DatasetContext)3 MacroEvaluator (co.cask.cdap.api.macro.MacroEvaluator)3 PluginContext (co.cask.cdap.api.plugin.PluginContext)3 TxRunnable (co.cask.cdap.api.TxRunnable)2 TransactionPolicy (co.cask.cdap.api.annotation.TransactionPolicy)2 BatchAggregator (co.cask.cdap.etl.api.batch.BatchAggregator)2 BatchConfigurable (co.cask.cdap.etl.api.batch.BatchConfigurable)2 BatchSourceContext (co.cask.cdap.etl.api.batch.BatchSourceContext)2