use of co.cask.cdap.etl.api.batch.SparkExecutionPluginContext in project cdap by caskdata.
the class RDDCollection method store.
@Override
public void store(StageInfo stageInfo, SparkSink<T> sink) throws Exception {
String stageName = stageInfo.getName();
SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, jsc, datasetContext, stageInfo);
JavaRDD<T> countedRDD = rdd.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)).cache();
sink.run(sparkPluginContext, countedRDD);
}
use of co.cask.cdap.etl.api.batch.SparkExecutionPluginContext in project cdap by caskdata.
the class DStreamCollection method compute.
@Override
public <U> SparkCollection<U> compute(final StageSpec stageSpec, SparkCompute<T, U> compute) throws Exception {
final SparkCompute<T, U> wrappedCompute = new DynamicSparkCompute<>(new DynamicDriverContext(stageSpec, sec, new NoopStageStatisticsCollector()), compute);
Transactionals.execute(sec, new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec);
SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, JavaSparkContext.fromSparkContext(stream.context().sparkContext()), datasetContext, pipelineRuntime, stageSpec);
wrappedCompute.initialize(sparkPluginContext);
}
}, Exception.class);
return wrap(stream.transform(new ComputeTransformFunction<>(sec, stageSpec, wrappedCompute)));
}
use of co.cask.cdap.etl.api.batch.SparkExecutionPluginContext in project cdap by caskdata.
the class StreamingSparkSinkFunction method call.
@Override
public Void call(JavaRDD<T> data, Time batchTime) throws Exception {
if (data.isEmpty()) {
return null;
}
final long logicalStartTime = batchTime.milliseconds();
MacroEvaluator evaluator = new DefaultMacroEvaluator(new BasicArguments(sec), logicalStartTime, sec.getSecureStore(), sec.getNamespace());
final PluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), stageSpec.isStageLoggingEnabled(), stageSpec.isProcessTimingEnabled());
final PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec, batchTime.milliseconds());
final String stageName = stageSpec.getName();
final SparkSink<T> sparkSink = pluginContext.newPluginInstance(stageName, evaluator);
boolean isPrepared = false;
boolean isDone = false;
try {
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
SparkPluginContext context = new BasicSparkPluginContext(null, pipelineRuntime, stageSpec, datasetContext, sec.getAdmin());
sparkSink.prepareRun(context);
}
});
isPrepared = true;
final SparkExecutionPluginContext sparkExecutionPluginContext = new SparkStreamingExecutionContext(sec, JavaSparkContext.fromSparkContext(data.rdd().context()), logicalStartTime, stageSpec);
final JavaRDD<T> countedRDD = data.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)).cache();
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
sparkSink.run(sparkExecutionPluginContext, countedRDD);
}
});
isDone = true;
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
SparkPluginContext context = new BasicSparkPluginContext(null, pipelineRuntime, stageSpec, datasetContext, sec.getAdmin());
sparkSink.onRunFinish(true, context);
}
});
} catch (Exception e) {
LOG.error("Error while executing sink {} for the batch for time {}.", stageName, logicalStartTime, e);
} finally {
if (isPrepared && !isDone) {
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
SparkPluginContext context = new BasicSparkPluginContext(null, pipelineRuntime, stageSpec, datasetContext, sec.getAdmin());
sparkSink.onRunFinish(false, context);
}
});
}
}
return null;
}
use of co.cask.cdap.etl.api.batch.SparkExecutionPluginContext in project cdap by caskdata.
the class RDDCollection method compute.
@Override
public <U> SparkCollection<U> compute(StageInfo stageInfo, SparkCompute<T, U> compute) throws Exception {
String stageName = stageInfo.getName();
SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, jsc, datasetContext, stageInfo);
compute.initialize(sparkPluginContext);
JavaRDD<T> countedInput = rdd.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)).cache();
return wrap(compute.transform(sparkPluginContext, countedInput).map(new CountingFunction<U>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName))));
}
use of co.cask.cdap.etl.api.batch.SparkExecutionPluginContext in project cdap by caskdata.
the class DStreamCollection method compute.
@Override
public <U> SparkCollection<U> compute(final StageInfo stageInfo, SparkCompute<T, U> compute) throws Exception {
final SparkCompute<T, U> wrappedCompute = new DynamicSparkCompute<>(new DynamicDriverContext(stageInfo, sec), compute);
Transactionals.execute(sec, new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, JavaSparkContext.fromSparkContext(stream.context().sparkContext()), datasetContext, stageInfo);
wrappedCompute.initialize(sparkPluginContext);
}
}, Exception.class);
return wrap(stream.transform(new ComputeTransformFunction<>(sec, stageInfo, wrappedCompute)));
}
Aggregations