use of co.cask.cdap.etl.common.plugin.PipelinePluginContext in project cdap by caskdata.
the class SparkStreamingPipelineDriver method run.
private JavaStreamingContext run(final DataStreamsPipelineSpec pipelineSpec, final PipelinePhase pipelinePhase, final JavaSparkExecutionContext sec, @Nullable final String checkpointDir) throws Exception {
Function0<JavaStreamingContext> contextFunction = new Function0<JavaStreamingContext>() {
@Override
public JavaStreamingContext call() throws Exception {
JavaStreamingContext jssc = new JavaStreamingContext(new JavaSparkContext(), Durations.milliseconds(pipelineSpec.getBatchIntervalMillis()));
SparkStreamingPipelineRunner runner = new SparkStreamingPipelineRunner(sec, jssc, pipelineSpec, false);
PipelinePluginContext pluginContext = new PipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), pipelineSpec.isStageLoggingEnabled(), pipelineSpec.isProcessTimingEnabled());
// Seems like they should be set at configure time instead of runtime? but that requires an API change.
try {
runner.runPipeline(pipelinePhase, StreamingSource.PLUGIN_TYPE, sec, new HashMap<String, Integer>(), pluginContext, new HashMap<String, StageStatisticsCollector>());
} catch (Exception e) {
throw new RuntimeException(e);
}
if (checkpointDir != null) {
jssc.checkpoint(checkpointDir);
}
return jssc;
}
};
return checkpointDir == null ? contextFunction.call() : StreamingCompat.getOrCreate(checkpointDir, contextFunction);
}
use of co.cask.cdap.etl.common.plugin.PipelinePluginContext in project cdap by caskdata.
the class JavaSparkMainWrapper method run.
@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
String stageName = sec.getSpecification().getProperty(ExternalSparkProgram.STAGE_NAME);
BatchPhaseSpec batchPhaseSpec = GSON.fromJson(sec.getSpecification().getProperty(Constants.PIPELINEID), BatchPhaseSpec.class);
PipelinePluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), batchPhaseSpec.isStageLoggingEnabled(), batchPhaseSpec.isProcessTimingEnabled());
Class<?> mainClass = pluginContext.loadPluginClass(stageName);
// if it's a CDAP JavaSparkMain, instantiate it and call the run method
if (JavaSparkMain.class.isAssignableFrom(mainClass)) {
MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(new BasicArguments(sec), sec.getLogicalStartTime(), sec.getSecureStore(), sec.getNamespace());
JavaSparkMain javaSparkMain = pluginContext.newPluginInstance(stageName, macroEvaluator);
javaSparkMain.run(sec);
} else {
// otherwise, assume there is a 'main' method and call it
String programArgs = getProgramArgs(sec, stageName);
String[] args = programArgs == null ? RuntimeArguments.toPosixArray(sec.getRuntimeArguments()) : programArgs.split(" ");
final Method mainMethod = mainClass.getMethod("main", String[].class);
final Object[] methodArgs = new Object[1];
methodArgs[0] = args;
Caller caller = pluginContext.getCaller(stageName);
caller.call(new Callable<Void>() {
@Override
public Void call() throws Exception {
mainMethod.invoke(null, methodArgs);
return null;
}
});
}
}
use of co.cask.cdap.etl.common.plugin.PipelinePluginContext in project cdap by caskdata.
the class PipelineCondition method apply.
@Override
public boolean apply(@Nullable WorkflowContext input) {
if (input == null) {
// should not happen
throw new IllegalStateException("WorkflowContext for the Condition cannot be null.");
}
Map<String, String> properties = input.getConditionSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
PipelinePhase phase = phaseSpec.getPhase();
StageSpec stageSpec = phase.iterator().next();
PluginContext pluginContext = new PipelinePluginContext(input, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(new BasicArguments(input.getToken(), input.getRuntimeArguments()), input.getLogicalStartTime(), input, input.getNamespace());
try {
Condition condition = pluginContext.newPluginInstance(stageSpec.getName(), macroEvaluator);
PipelineRuntime pipelineRuntime = new PipelineRuntime(input, metrics);
ConditionContext conditionContext = new BasicConditionContext(input, pipelineRuntime, stageSpec);
boolean result = condition.apply(conditionContext);
WorkflowToken token = input.getToken();
if (token == null) {
throw new IllegalStateException("WorkflowToken cannot be null when Condition is executed through Workflow.");
}
for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
token.put(entry.getKey(), entry.getValue());
}
return result;
} catch (Exception e) {
String msg = String.format("Error executing condition '%s' in the pipeline.", stageSpec.getName());
throw new RuntimeException(msg, e);
}
}
use of co.cask.cdap.etl.common.plugin.PipelinePluginContext in project cdap by caskdata.
the class PipelineAction method run.
@Override
public void run() throws Exception {
CustomActionContext context = getContext();
Map<String, String> properties = context.getSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
PipelinePhase phase = phaseSpec.getPhase();
StageSpec stageSpec = phase.iterator().next();
PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context.getNamespace()));
ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
action.run(actionContext);
}
WorkflowToken token = context.getWorkflowToken();
if (token == null) {
throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
}
for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
token.put(entry.getKey(), entry.getValue());
}
}
use of co.cask.cdap.etl.common.plugin.PipelinePluginContext in project cdap by caskdata.
the class BatchSparkPipelineDriver method run.
@Override
public void run(DatasetContext context) throws Exception {
BatchPhaseSpec phaseSpec = GSON.fromJson(sec.getSpecification().getProperty(Constants.PIPELINEID), BatchPhaseSpec.class);
Path configFile = sec.getLocalizationContext().getLocalFile("HydratorSpark.config").toPath();
try (BufferedReader reader = Files.newBufferedReader(configFile, StandardCharsets.UTF_8)) {
String object = reader.readLine();
SparkBatchSourceSinkFactoryInfo sourceSinkInfo = GSON.fromJson(object, SparkBatchSourceSinkFactoryInfo.class);
sourceFactory = sourceSinkInfo.getSparkBatchSourceFactory();
sinkFactory = sourceSinkInfo.getSparkBatchSinkFactory();
stagePartitions = sourceSinkInfo.getStagePartitions();
}
datasetContext = context;
numOfRecordsPreview = phaseSpec.getNumOfRecordsPreview();
PipelinePluginContext pluginContext = new PipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
Map<String, StageStatisticsCollector> collectors = new HashMap<>();
if (phaseSpec.pipelineContainsCondition()) {
Iterator<StageSpec> iterator = phaseSpec.getPhase().iterator();
while (iterator.hasNext()) {
StageSpec spec = iterator.next();
collectors.put(spec.getName(), new SparkStageStatisticsCollector(jsc));
}
}
try {
PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(pluginContext, sec.getMetrics(), phaseSpec, new SingleConnectorFactory());
runPipeline(phaseSpec.getPhase(), BatchSource.PLUGIN_TYPE, sec, stagePartitions, pluginInstantiator, collectors);
} finally {
updateWorkflowToken(sec.getWorkflowToken(), collectors);
}
}
Aggregations