Search in sources :

Example 1 with PluginContext

use of io.cdap.cdap.api.plugin.PluginContext in project cdap by caskdata.

the class PipelineAction method run.

@Override
public void run() throws Exception {
    CustomActionContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    PipelinePhase phase = phaseSpec.getPhase();
    StageSpec stageSpec = phase.iterator().next();
    PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
    Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace()));
    ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
    if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
        action.run(actionContext);
    }
    WorkflowToken token = context.getWorkflowToken();
    if (token == null) {
        throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
    }
    for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
        token.put(entry.getKey(), entry.getValue());
    }
}
Also used : Action(io.cdap.cdap.etl.api.action.Action) AbstractCustomAction(io.cdap.cdap.api.customaction.AbstractCustomAction) CustomAction(io.cdap.cdap.api.customaction.CustomAction) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext) PluginContext(io.cdap.cdap.api.plugin.PluginContext) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) ActionContext(io.cdap.cdap.etl.api.action.ActionContext) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) HashMap(java.util.HashMap) Map(java.util.Map) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext)

Example 2 with PluginContext

use of io.cdap.cdap.api.plugin.PluginContext in project cdap by caskdata.

the class StreamingMultiSinkFunction method call.

@Override
public void call(JavaRDD<RecordInfo<Object>> data, Time batchTime) throws Exception {
    long logicalStartTime = batchTime.milliseconds();
    MacroEvaluator evaluator = new DefaultMacroEvaluator(new BasicArguments(sec), logicalStartTime, sec.getSecureStore(), sec.getServiceDiscoverer(), sec.getNamespace());
    PluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
    SparkBatchSinkFactory sinkFactory = new SparkBatchSinkFactory();
    PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec, logicalStartTime);
    Map<String, SubmitterLifecycle<?>> stages = createStages(evaluator);
    // call prepareRun() on all the stages in the group
    // need to call it in an order that guarantees that inputs are called before outputs
    // this is because plugins can call getArguments().set() in the prepareRun() method,
    // which downstream stages should be able to read
    List<String> traversalOrder = new ArrayList(group.size());
    for (String stageName : phaseSpec.getPhase().getDag().getTopologicalOrder()) {
        if (group.contains(stageName)) {
            traversalOrder.add(stageName);
        }
    }
    for (String stageName : traversalOrder) {
        SubmitterLifecycle<?> plugin = stages.get(stageName);
        StageSpec stageSpec = phaseSpec.getPhase().getStage(stageName);
        try {
            prepareRun(pipelineRuntime, sinkFactory, stageSpec, plugin);
        } catch (Exception e) {
            LOG.error("Error preparing sink {} for the batch for time {}.", stageName, logicalStartTime, e);
            return;
        }
    }
    // run the actual transforms and sinks in this group
    boolean ranSuccessfully = true;
    try {
        MultiSinkFunction multiSinkFunction = new MultiSinkFunction(sec, phaseSpec, group, collectors);
        Set<String> outputNames = sinkFactory.writeCombinedRDD(data.flatMapToPair(multiSinkFunction), sec, sinkNames);
        sec.execute(new TxRunnable() {

            @Override
            public void run(DatasetContext context) throws Exception {
                for (String outputName : outputNames) {
                    ExternalDatasets.registerLineage(sec.getAdmin(), outputName, AccessType.WRITE, null, () -> context.getDataset(outputName));
                }
            }
        });
    } catch (Exception e) {
        LOG.error("Error writing to sinks {} for the batch for time {}.", sinkNames, logicalStartTime, e);
        ranSuccessfully = false;
    }
    // run onRunFinish() for each sink
    for (String stageName : traversalOrder) {
        SubmitterLifecycle<?> plugin = stages.get(stageName);
        StageSpec stageSpec = phaseSpec.getPhase().getStage(stageName);
        try {
            onRunFinish(pipelineRuntime, sinkFactory, stageSpec, plugin, ranSuccessfully);
        } catch (Exception e) {
            LOG.warn("Unable to execute onRunFinish for sink {}", stageName, e);
        }
    }
}
Also used : SubmitterLifecycle(io.cdap.cdap.etl.api.SubmitterLifecycle) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) SparkPipelineRuntime(io.cdap.cdap.etl.spark.SparkPipelineRuntime) SparkPipelinePluginContext(io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext) PluginContext(io.cdap.cdap.api.plugin.PluginContext) SparkPipelineRuntime(io.cdap.cdap.etl.spark.SparkPipelineRuntime) ArrayList(java.util.ArrayList) MultiSinkFunction(io.cdap.cdap.etl.spark.function.MultiSinkFunction) TransactionFailureException(org.apache.tephra.TransactionFailureException) SparkPipelinePluginContext(io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext) SparkBatchSinkFactory(io.cdap.cdap.etl.spark.batch.SparkBatchSinkFactory) TxRunnable(io.cdap.cdap.api.TxRunnable) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) BasicArguments(io.cdap.cdap.etl.common.BasicArguments) DatasetContext(io.cdap.cdap.api.data.DatasetContext)

Example 3 with PluginContext

use of io.cdap.cdap.api.plugin.PluginContext in project cdap by caskdata.

the class StreamingBatchSinkFunction method call.

@Override
public void call(JavaRDD<T> data, Time batchTime) throws Exception {
    final long logicalStartTime = batchTime.milliseconds();
    MacroEvaluator evaluator = new DefaultMacroEvaluator(new BasicArguments(sec), logicalStartTime, sec.getSecureStore(), sec.getServiceDiscoverer(), sec.getNamespace());
    PluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), stageSpec.isStageLoggingEnabled(), stageSpec.isProcessTimingEnabled());
    final SparkBatchSinkFactory sinkFactory = new SparkBatchSinkFactory();
    final String stageName = stageSpec.getName();
    final BatchSink<Object, Object, Object> batchSink = pluginContext.newPluginInstance(stageName, evaluator);
    final PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec, logicalStartTime);
    boolean isPrepared = false;
    boolean isDone = false;
    try {
        sec.execute(new TxRunnable() {

            @Override
            public void run(DatasetContext datasetContext) throws Exception {
                SparkBatchSinkContext sinkContext = new SparkBatchSinkContext(sinkFactory, sec, datasetContext, pipelineRuntime, stageSpec);
                batchSink.prepareRun(sinkContext);
            }
        });
        isPrepared = true;
        PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, pipelineRuntime.getArguments().asMap(), batchTime.milliseconds(), new NoopStageStatisticsCollector());
        Set<String> outputNames = sinkFactory.writeFromRDD(data.flatMapToPair(new BatchSinkFunction<T, Object, Object>(pluginFunctionContext, functionCache)), sec, stageName);
        sec.execute(new TxRunnable() {

            @Override
            public void run(DatasetContext context) throws Exception {
                for (String outputName : outputNames) {
                    ExternalDatasets.registerLineage(sec.getAdmin(), outputName, AccessType.WRITE, null, () -> context.getDataset(outputName));
                }
            }
        });
        isDone = true;
        sec.execute(new TxRunnable() {

            @Override
            public void run(DatasetContext datasetContext) throws Exception {
                SparkBatchSinkContext sinkContext = new SparkBatchSinkContext(sinkFactory, sec, datasetContext, pipelineRuntime, stageSpec);
                batchSink.onRunFinish(true, sinkContext);
            }
        });
    } catch (Exception e) {
        LOG.error("Error writing to sink {} for the batch for time {}.", stageName, logicalStartTime, e);
    } finally {
        if (isPrepared && !isDone) {
            sec.execute(new TxRunnable() {

                @Override
                public void run(DatasetContext datasetContext) throws Exception {
                    SparkBatchSinkContext sinkContext = new SparkBatchSinkContext(sinkFactory, sec, datasetContext, pipelineRuntime, stageSpec);
                    batchSink.onRunFinish(false, sinkContext);
                }
            });
        }
    }
}
Also used : NoopStageStatisticsCollector(io.cdap.cdap.etl.common.NoopStageStatisticsCollector) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) SparkPipelineRuntime(io.cdap.cdap.etl.spark.SparkPipelineRuntime) SparkPipelinePluginContext(io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext) PluginContext(io.cdap.cdap.api.plugin.PluginContext) SparkPipelineRuntime(io.cdap.cdap.etl.spark.SparkPipelineRuntime) SparkBatchSinkContext(io.cdap.cdap.etl.spark.batch.SparkBatchSinkContext) BatchSinkFunction(io.cdap.cdap.etl.spark.function.BatchSinkFunction) SparkPipelinePluginContext(io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext) PluginFunctionContext(io.cdap.cdap.etl.spark.function.PluginFunctionContext) SparkBatchSinkFactory(io.cdap.cdap.etl.spark.batch.SparkBatchSinkFactory) TxRunnable(io.cdap.cdap.api.TxRunnable) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) BasicArguments(io.cdap.cdap.etl.common.BasicArguments) DatasetContext(io.cdap.cdap.api.data.DatasetContext)

Example 4 with PluginContext

use of io.cdap.cdap.api.plugin.PluginContext in project cdap by caskdata.

the class StreamingSparkSinkFunction method call.

@Override
public void call(JavaRDD<T> data, Time batchTime) throws Exception {
    if (data.isEmpty()) {
        return;
    }
    final long logicalStartTime = batchTime.milliseconds();
    MacroEvaluator evaluator = new DefaultMacroEvaluator(new BasicArguments(sec), logicalStartTime, sec.getSecureStore(), sec.getServiceDiscoverer(), sec.getNamespace());
    final PluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), stageSpec.isStageLoggingEnabled(), stageSpec.isProcessTimingEnabled());
    final PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec, batchTime.milliseconds());
    final String stageName = stageSpec.getName();
    final SparkSink<T> sparkSink = pluginContext.newPluginInstance(stageName, evaluator);
    boolean isPrepared = false;
    boolean isDone = false;
    try {
        sec.execute(new TxRunnable() {

            @Override
            public void run(DatasetContext datasetContext) throws Exception {
                SparkPluginContext context = new BasicSparkPluginContext(null, pipelineRuntime, stageSpec, datasetContext, sec.getAdmin());
                sparkSink.prepareRun(context);
            }
        });
        isPrepared = true;
        final SparkExecutionPluginContext sparkExecutionPluginContext = new SparkStreamingExecutionContext(sec, JavaSparkContext.fromSparkContext(data.rdd().context()), logicalStartTime, stageSpec);
        final JavaRDD<T> countedRDD = data.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)).cache();
        sec.execute(new TxRunnable() {

            @Override
            public void run(DatasetContext context) throws Exception {
                sparkSink.run(sparkExecutionPluginContext, countedRDD);
            }
        });
        isDone = true;
        sec.execute(new TxRunnable() {

            @Override
            public void run(DatasetContext datasetContext) throws Exception {
                SparkPluginContext context = new BasicSparkPluginContext(null, pipelineRuntime, stageSpec, datasetContext, sec.getAdmin());
                sparkSink.onRunFinish(true, context);
            }
        });
    } catch (Exception e) {
        LOG.error("Error while executing sink {} for the batch for time {}.", stageName, logicalStartTime, e);
    } finally {
        if (isPrepared && !isDone) {
            sec.execute(new TxRunnable() {

                @Override
                public void run(DatasetContext datasetContext) throws Exception {
                    SparkPluginContext context = new BasicSparkPluginContext(null, pipelineRuntime, stageSpec, datasetContext, sec.getAdmin());
                    sparkSink.onRunFinish(false, context);
                }
            });
        }
    }
}
Also used : DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) SparkPipelineRuntime(io.cdap.cdap.etl.spark.SparkPipelineRuntime) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) SparkPipelinePluginContext(io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext) PluginContext(io.cdap.cdap.api.plugin.PluginContext) BasicSparkPluginContext(io.cdap.cdap.etl.spark.batch.BasicSparkPluginContext) SparkExecutionPluginContext(io.cdap.cdap.etl.api.batch.SparkExecutionPluginContext) SparkPluginContext(io.cdap.cdap.etl.api.batch.SparkPluginContext) SparkPipelineRuntime(io.cdap.cdap.etl.spark.SparkPipelineRuntime) SparkStreamingExecutionContext(io.cdap.cdap.etl.spark.streaming.SparkStreamingExecutionContext) CountingFunction(io.cdap.cdap.etl.spark.function.CountingFunction) SparkPipelinePluginContext(io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext) SparkExecutionPluginContext(io.cdap.cdap.etl.api.batch.SparkExecutionPluginContext) TxRunnable(io.cdap.cdap.api.TxRunnable) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) BasicArguments(io.cdap.cdap.etl.common.BasicArguments) DatasetContext(io.cdap.cdap.api.data.DatasetContext) BasicSparkPluginContext(io.cdap.cdap.etl.spark.batch.BasicSparkPluginContext) SparkPluginContext(io.cdap.cdap.etl.api.batch.SparkPluginContext) BasicSparkPluginContext(io.cdap.cdap.etl.spark.batch.BasicSparkPluginContext)

Example 5 with PluginContext

use of io.cdap.cdap.api.plugin.PluginContext in project cdap by caskdata.

the class ArtifactRepositoryTest method testMacroPlugin.

@Test
public void testMacroPlugin() throws Exception {
    File pluginDir = TMP_FOLDER.newFolder();
    addPluginArtifact();
    SortedMap<ArtifactDescriptor, Set<PluginClass>> plugins = getPlugins();
    copyArtifacts(pluginDir, plugins);
    // set up test macro evaluator's substitutions
    Map<String, String> propertySubstitutions = ImmutableMap.<String, String>builder().put("expansiveHostname", "${hostname}/${path}:${port}").put("hostname", "${one}").put("path", "${two}").put("port", "${three}").put("one", "${host${hostScopeMacro}}").put("hostScopeMacro", "-local").put("host-local", "${l}${o}${c}${a}${l}${hostSuffix}").put("l", "l").put("o", "o").put("c", "c").put("a", "a").put("hostSuffix", "host").put("two", "${filename${fileTypeMacro}}").put("three", "${firstPortDigit}${secondPortDigit}").put("filename", "index").put("fileTypeMacro", "-html").put("filename-html", "index.html").put("filename-php", "index.php").put("firstPortDigit", "8").put("secondPortDigit", "0").put("aBoolean", "true").put("aByte", "101").put("aChar", "k").put("aDouble", "64.0").put("aFloat", "52.0").put("anInt", "42").put("aLong", "32").put("aShort", "81").put("authInfo", new Gson().toJson(new TestPlugin.AuthInfo("token", "id"))).build();
    // Instantiate the plugins and execute them
    try (PluginInstantiator instantiator = new PluginInstantiator(cConf, appClassLoader, pluginDir)) {
        for (Map.Entry<ArtifactDescriptor, Set<PluginClass>> entry : plugins.entrySet()) {
            for (PluginClass pluginClass : entry.getValue()) {
                Plugin pluginInfo = new Plugin(new ArrayList<>(), entry.getKey().getArtifactId(), pluginClass, PluginProperties.builder().add("class.name", TEST_EMPTY_CLASS).add("nullableLongFlag", "10").add("host", "${expansiveHostname}").add("aBoolean", "${aBoolean}").add("aByte", "${aByte}").add("aChar", "${aChar}").add("aDouble", "${aDouble}").add("anInt", "${anInt}").add("aFloat", "${aFloat}").add("aLong", "${aLong}").add("aShort", "${aShort}").add("authInfo", "${authInfo}").build());
                TestMacroEvaluator testMacroEvaluator = new TestMacroEvaluator(propertySubstitutions, new HashMap<>());
                Callable<String> plugin = instantiator.newInstance(pluginInfo, testMacroEvaluator);
                Assert.assertEquals("localhost/index.html:80,true,101,k,64.0,52.0,42,32,81,AuthInfo{token='token', id='id'}", plugin.call());
                String pluginId = "5";
                PluginContext pluginContext = new DefaultPluginContext(instantiator, NamespaceId.DEFAULT.app("abc").worker("w"), ImmutableMap.of(pluginId, pluginInfo), new FeatureFlagsProvider() {
                });
                PluginProperties resolvedProperties = pluginContext.getPluginProperties(pluginId, testMacroEvaluator);
                Map<String, String> expected = new HashMap<>();
                expected.put("class.name", TEST_EMPTY_CLASS);
                expected.put("nullableLongFlag", "10");
                expected.put("host", "localhost/index.html:80");
                expected.put("aBoolean", "true");
                expected.put("aByte", "101");
                expected.put("aChar", "k");
                expected.put("aDouble", "64.0");
                expected.put("anInt", "42");
                expected.put("aFloat", "52.0");
                expected.put("aLong", "32");
                expected.put("aShort", "81");
                expected.put("authInfo", propertySubstitutions.get("authInfo"));
                Assert.assertEquals(expected, resolvedProperties.getProperties());
            }
        }
    }
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) TestMacroEvaluator(io.cdap.cdap.internal.app.runtime.plugin.TestMacroEvaluator) PluginContext(io.cdap.cdap.api.plugin.PluginContext) DefaultPluginContext(io.cdap.cdap.internal.app.runtime.DefaultPluginContext) HashMap(java.util.HashMap) Gson(com.google.gson.Gson) FeatureFlagsProvider(io.cdap.cdap.api.feature.FeatureFlagsProvider) TestPlugin(io.cdap.cdap.internal.app.plugins.test.TestPlugin) PluginInstantiator(io.cdap.cdap.internal.app.runtime.plugin.PluginInstantiator) PluginClass(io.cdap.cdap.api.plugin.PluginClass) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) PluginProperties(io.cdap.cdap.api.plugin.PluginProperties) DefaultPluginContext(io.cdap.cdap.internal.app.runtime.DefaultPluginContext) TestPlugin(io.cdap.cdap.internal.app.plugins.test.TestPlugin) NestedConfigPlugin(io.cdap.cdap.internal.app.runtime.artifact.plugin.nested.NestedConfigPlugin) Plugin(io.cdap.cdap.api.plugin.Plugin) Test(org.junit.Test)

Aggregations

PluginContext (io.cdap.cdap.api.plugin.PluginContext)10 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)7 DefaultMacroEvaluator (io.cdap.cdap.etl.common.DefaultMacroEvaluator)7 PipelineRuntime (io.cdap.cdap.etl.common.PipelineRuntime)7 SparkPipelinePluginContext (io.cdap.cdap.etl.spark.plugin.SparkPipelinePluginContext)6 BasicArguments (io.cdap.cdap.etl.common.BasicArguments)5 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)4 SparkPipelineRuntime (io.cdap.cdap.etl.spark.SparkPipelineRuntime)4 HashMap (java.util.HashMap)4 TxRunnable (io.cdap.cdap.api.TxRunnable)3 DatasetContext (io.cdap.cdap.api.data.DatasetContext)3 PipelinePluginContext (io.cdap.cdap.etl.common.plugin.PipelinePluginContext)3 Map (java.util.Map)3 WorkflowToken (io.cdap.cdap.api.workflow.WorkflowToken)2 AlertPublisher (io.cdap.cdap.etl.api.AlertPublisher)2 SubmitterLifecycle (io.cdap.cdap.etl.api.SubmitterLifecycle)2 BatchPhaseSpec (io.cdap.cdap.etl.batch.BatchPhaseSpec)2 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)2 SparkBatchSinkFactory (io.cdap.cdap.etl.spark.batch.SparkBatchSinkFactory)2 PluginFunctionContext (io.cdap.cdap.etl.spark.function.PluginFunctionContext)2