use of co.cask.cdap.etl.spark.SparkPipelineRuntime in project cdap by caskdata.
the class RDDCollection method compute.
@Override
public <U> SparkCollection<U> compute(StageSpec stageSpec, SparkCompute<T, U> compute) throws Exception {
String stageName = stageSpec.getName();
PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec);
SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, jsc, datasetContext, pipelineRuntime, stageSpec);
compute.initialize(sparkPluginContext);
JavaRDD<T> countedInput = rdd.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)).cache();
return wrap(compute.transform(sparkPluginContext, countedInput).map(new CountingFunction<U>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName))));
}
use of co.cask.cdap.etl.spark.SparkPipelineRuntime in project cdap by caskdata.
the class DynamicSparkCompute method lazyInit.
// when checkpointing is enabled, and Spark is loading DStream operations from an existing checkpoint,
// delegate will be null and the initialize() method won't have been called. So we need to instantiate
// the delegate and initialize it.
private void lazyInit(final JavaSparkContext jsc) throws Exception {
if (delegate == null) {
PluginFunctionContext pluginFunctionContext = dynamicDriverContext.getPluginFunctionContext();
delegate = pluginFunctionContext.createPlugin();
final StageSpec stageSpec = pluginFunctionContext.getStageSpec();
final JavaSparkExecutionContext sec = dynamicDriverContext.getSparkExecutionContext();
Transactionals.execute(sec, new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec);
SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, jsc, datasetContext, pipelineRuntime, stageSpec);
delegate.initialize(sparkPluginContext);
}
}, Exception.class);
}
}
use of co.cask.cdap.etl.spark.SparkPipelineRuntime in project cdap by caskdata.
the class StreamingAlertPublishFunction method call.
@Override
public Void call(JavaRDD<Alert> data, Time batchTime) throws Exception {
MacroEvaluator evaluator = new DefaultMacroEvaluator(new BasicArguments(sec), batchTime.milliseconds(), sec.getSecureStore(), sec.getNamespace());
PluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), stageSpec.isStageLoggingEnabled(), stageSpec.isProcessTimingEnabled());
String stageName = stageSpec.getName();
AlertPublisher alertPublisher = pluginContext.newPluginInstance(stageName, evaluator);
PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec, batchTime.milliseconds());
AlertPublisherContext alertPublisherContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, sec.getMessagingContext(), sec.getAdmin());
alertPublisher.initialize(alertPublisherContext);
StageMetrics stageMetrics = new DefaultStageMetrics(sec.getMetrics(), stageName);
TrackedIterator<Alert> trackedAlerts = new TrackedIterator<>(data.collect().iterator(), stageMetrics, Constants.Metrics.RECORDS_IN);
alertPublisher.publish(trackedAlerts);
alertPublisher.destroy();
return null;
}
Aggregations