use of com.cloudera.dataflow.spark.streaming.SparkStreamingPipelineOptions in project spark-dataflow by cloudera.
the class SparkPipelineRunner method createStreamingEvaluationContext.
private EvaluationContext createStreamingEvaluationContext(JavaSparkContext jsc, Pipeline pipeline, Duration batchDuration) {
SparkStreamingPipelineOptions streamingOptions = (SparkStreamingPipelineOptions) mOptions;
JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration);
return new StreamingEvaluationContext(jsc, pipeline, jssc, streamingOptions.getTimeout());
}
use of com.cloudera.dataflow.spark.streaming.SparkStreamingPipelineOptions in project spark-dataflow by cloudera.
the class SparkPipelineRunner method run.
@Override
public EvaluationResult run(Pipeline pipeline) {
try {
// validate streaming configuration
if (mOptions.isStreaming() && !(mOptions instanceof SparkStreamingPipelineOptions)) {
throw new RuntimeException("A streaming job must be configured with " + SparkStreamingPipelineOptions.class.getSimpleName() + ", found " + mOptions.getClass().getSimpleName());
}
LOG.info("Executing pipeline using the SparkPipelineRunner.");
JavaSparkContext jsc = SparkContextFactory.getSparkContext(mOptions.getSparkMaster(), mOptions.getAppName());
if (mOptions.isStreaming()) {
SparkPipelineTranslator translator = new StreamingTransformTranslator.Translator(new TransformTranslator.Translator());
// if streaming - fixed window should be defined on all UNBOUNDED inputs
StreamingWindowPipelineDetector streamingWindowPipelineDetector = new StreamingWindowPipelineDetector(translator);
pipeline.traverseTopologically(streamingWindowPipelineDetector);
if (!streamingWindowPipelineDetector.isWindowing()) {
throw new IllegalStateException("Spark streaming pipeline must be windowed!");
}
Duration batchInterval = streamingWindowPipelineDetector.getBatchDuration();
LOG.info("Setting Spark streaming batchInterval to {} msec", batchInterval.milliseconds());
EvaluationContext ctxt = createStreamingEvaluationContext(jsc, pipeline, batchInterval);
pipeline.traverseTopologically(new SparkPipelineEvaluator(ctxt, translator));
ctxt.computeOutputs();
LOG.info("Streaming pipeline construction complete. Starting execution..");
((StreamingEvaluationContext) ctxt).getStreamingContext().start();
return ctxt;
} else {
EvaluationContext ctxt = new EvaluationContext(jsc, pipeline);
SparkPipelineTranslator translator = new TransformTranslator.Translator();
pipeline.traverseTopologically(new SparkPipelineEvaluator(ctxt, translator));
ctxt.computeOutputs();
LOG.info("Pipeline execution complete.");
return ctxt;
}
} catch (Exception e) {
// SparkProcessException), or just use the SparkException cause.
if (e instanceof SparkException && e.getCause() != null) {
if (e.getCause() instanceof SparkProcessContext.SparkProcessException && e.getCause().getCause() != null) {
throw new RuntimeException(e.getCause().getCause());
} else {
throw new RuntimeException(e.getCause());
}
}
// otherwise just wrap in a RuntimeException
throw new RuntimeException(e);
}
}
Aggregations