use of co.cask.cdap.etl.spark.function.CountingFunction in project cdap by caskdata.
the class StreamingSparkSinkFunction method call.
@Override
public Void call(JavaRDD<T> data, Time batchTime) throws Exception {
final long logicalStartTime = batchTime.milliseconds();
MacroEvaluator evaluator = new DefaultMacroEvaluator(sec.getWorkflowToken(), sec.getRuntimeArguments(), logicalStartTime, sec.getSecureStore(), sec.getNamespace());
final PluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), stageInfo.isStageLoggingEnabled(), stageInfo.isProcessTimingEnabled());
final String stageName = stageInfo.getName();
final SparkSink<T> sparkSink = pluginContext.newPluginInstance(stageName, evaluator);
boolean isPrepared = false;
boolean isDone = false;
try {
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
SparkPluginContext context = new BasicSparkPluginContext(sec, datasetContext, stageInfo);
sparkSink.prepareRun(context);
}
});
isPrepared = true;
final SparkExecutionPluginContext sparkExecutionPluginContext = new SparkStreamingExecutionContext(sec, JavaSparkContext.fromSparkContext(data.rdd().context()), logicalStartTime, stageInfo);
final JavaRDD<T> countedRDD = data.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)).cache();
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
sparkSink.run(sparkExecutionPluginContext, countedRDD);
}
});
isDone = true;
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
SparkPluginContext context = new BasicSparkPluginContext(sec, datasetContext, stageInfo);
sparkSink.onRunFinish(true, context);
}
});
} catch (Exception e) {
LOG.error("Error while executing sink {} for the batch for time {}.", stageName, logicalStartTime, e);
} finally {
if (isPrepared && !isDone) {
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext datasetContext) throws Exception {
SparkPluginContext context = new BasicSparkPluginContext(sec, datasetContext, stageInfo);
sparkSink.onRunFinish(false, context);
}
});
}
}
return null;
}
use of co.cask.cdap.etl.spark.function.CountingFunction in project cdap by caskdata.
the class RDDCollection method store.
@Override
public void store(StageInfo stageInfo, SparkSink<T> sink) throws Exception {
String stageName = stageInfo.getName();
SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, jsc, datasetContext, stageInfo);
JavaRDD<T> countedRDD = rdd.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)).cache();
sink.run(sparkPluginContext, countedRDD);
}
use of co.cask.cdap.etl.spark.function.CountingFunction in project cdap by caskdata.
the class RDDCollection method compute.
@Override
public <U> SparkCollection<U> compute(StageInfo stageInfo, SparkCompute<T, U> compute) throws Exception {
String stageName = stageInfo.getName();
SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, jsc, datasetContext, stageInfo);
compute.initialize(sparkPluginContext);
JavaRDD<T> countedInput = rdd.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null)).cache();
return wrap(compute.transform(sparkPluginContext, countedInput).map(new CountingFunction<U>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName))));
}
use of co.cask.cdap.etl.spark.function.CountingFunction in project cdap by caskdata.
the class ComputeTransformFunction method call.
@Override
public JavaRDD<U> call(JavaRDD<T> data, Time batchTime) throws Exception {
SparkExecutionPluginContext sparkPluginContext = new SparkStreamingExecutionContext(sec, JavaSparkContext.fromSparkContext(data.context()), batchTime.milliseconds(), stageInfo);
String stageName = stageInfo.getName();
data = data.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null));
return compute.transform(sparkPluginContext, data).map(new CountingFunction<U>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName)));
}
Aggregations