use of org.apache.spark.streaming.Duration in project learning-spark by databricks.
the class StreamingLogInput method main.
public static void main(String[] args) throws Exception {
String master = args[0];
JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput");
// Create a StreamingContext with a 1 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000));
// Create a DStream from all the input on port 7777
JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777);
// Filter our DStream for lines with "error"
JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() {
public Boolean call(String line) {
return line.contains("error");
}
});
// Print out the lines with errors, which causes this DStream to be evaluated
errorLines.print();
// start our streaming context and wait for it to "finish"
jssc.start();
// Wait for 10 seconds then exit. To run forever call without a timeout
jssc.awaitTermination(10000);
// Stop the streaming context
jssc.stop();
}
use of org.apache.spark.streaming.Duration in project deeplearning4j by deeplearning4j.
the class JavaQueueStream method main.
public static void main(String[] args) throws Exception {
SparkConf sparkConf = new SparkConf().setMaster("local[*]");
// Create the context
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));
// Create the queue through which RDDs can be pushed to
// a QueueInputDStream
Queue<JavaRDD<Integer>> rddQueue = new LinkedList<>();
// Create and push some RDDs into the queue
List<Integer> list = Lists.newArrayList();
for (int i = 0; i < 1000; i++) {
list.add(i);
}
for (int i = 0; i < 30; i++) {
rddQueue.add(ssc.sparkContext().parallelize(list));
}
// Create the QueueInputDStream and use it do some processing
JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue);
JavaPairDStream<Integer, Integer> mappedStream = inputStream.mapToPair(new PairFunction<Integer, Integer, Integer>() {
@Override
public Tuple2<Integer, Integer> call(Integer i) {
return new Tuple2<>(i % 10, 1);
}
});
JavaPairDStream<Integer, Integer> reducedStream = mappedStream.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
reducedStream.print();
ssc.start();
ssc.awaitTermination();
}
use of org.apache.spark.streaming.Duration in project spark-dataflow by cloudera.
the class StreamingTransformTranslator method window.
private static <T, W extends BoundedWindow> TransformEvaluator<Window.Bound<T>> window() {
return new TransformEvaluator<Window.Bound<T>>() {
@Override
public void evaluate(Window.Bound<T> transform, EvaluationContext context) {
StreamingEvaluationContext sec = (StreamingEvaluationContext) context;
//--- first we apply windowing to the stream
WindowFn<? super T, W> windowFn = WINDOW_FG.get("windowFn", transform);
@SuppressWarnings("unchecked") JavaDStream<WindowedValue<T>> dStream = (JavaDStream<WindowedValue<T>>) sec.getStream(transform);
if (windowFn instanceof FixedWindows) {
Duration windowDuration = Durations.milliseconds(((FixedWindows) windowFn).getSize().getMillis());
sec.setStream(transform, dStream.window(windowDuration));
} else if (windowFn instanceof SlidingWindows) {
Duration windowDuration = Durations.milliseconds(((SlidingWindows) windowFn).getSize().getMillis());
Duration slideDuration = Durations.milliseconds(((SlidingWindows) windowFn).getPeriod().getMillis());
sec.setStream(transform, dStream.window(windowDuration, slideDuration));
}
//--- then we apply windowing to the elements
DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<>(windowFn);
DoFnFunction<T, T> dofn = new DoFnFunction<>(addWindowsDoFn, ((StreamingEvaluationContext) context).getRuntimeContext(), null);
@SuppressWarnings("unchecked") JavaDStreamLike<WindowedValue<T>, ?, JavaRDD<WindowedValue<T>>> dstream = (JavaDStreamLike<WindowedValue<T>, ?, JavaRDD<WindowedValue<T>>>) sec.getStream(transform);
sec.setStream(transform, dstream.mapPartitions(dofn));
}
};
}
use of org.apache.spark.streaming.Duration in project spark-dataflow by cloudera.
the class SparkPipelineRunner method run.
@Override
public EvaluationResult run(Pipeline pipeline) {
try {
// validate streaming configuration
if (mOptions.isStreaming() && !(mOptions instanceof SparkStreamingPipelineOptions)) {
throw new RuntimeException("A streaming job must be configured with " + SparkStreamingPipelineOptions.class.getSimpleName() + ", found " + mOptions.getClass().getSimpleName());
}
LOG.info("Executing pipeline using the SparkPipelineRunner.");
JavaSparkContext jsc = SparkContextFactory.getSparkContext(mOptions.getSparkMaster(), mOptions.getAppName());
if (mOptions.isStreaming()) {
SparkPipelineTranslator translator = new StreamingTransformTranslator.Translator(new TransformTranslator.Translator());
// if streaming - fixed window should be defined on all UNBOUNDED inputs
StreamingWindowPipelineDetector streamingWindowPipelineDetector = new StreamingWindowPipelineDetector(translator);
pipeline.traverseTopologically(streamingWindowPipelineDetector);
if (!streamingWindowPipelineDetector.isWindowing()) {
throw new IllegalStateException("Spark streaming pipeline must be windowed!");
}
Duration batchInterval = streamingWindowPipelineDetector.getBatchDuration();
LOG.info("Setting Spark streaming batchInterval to {} msec", batchInterval.milliseconds());
EvaluationContext ctxt = createStreamingEvaluationContext(jsc, pipeline, batchInterval);
pipeline.traverseTopologically(new SparkPipelineEvaluator(ctxt, translator));
ctxt.computeOutputs();
LOG.info("Streaming pipeline construction complete. Starting execution..");
((StreamingEvaluationContext) ctxt).getStreamingContext().start();
return ctxt;
} else {
EvaluationContext ctxt = new EvaluationContext(jsc, pipeline);
SparkPipelineTranslator translator = new TransformTranslator.Translator();
pipeline.traverseTopologically(new SparkPipelineEvaluator(ctxt, translator));
ctxt.computeOutputs();
LOG.info("Pipeline execution complete.");
return ctxt;
}
} catch (Exception e) {
// SparkProcessException), or just use the SparkException cause.
if (e instanceof SparkException && e.getCause() != null) {
if (e.getCause() instanceof SparkProcessContext.SparkProcessException && e.getCause().getCause() != null) {
throw new RuntimeException(e.getCause().getCause());
} else {
throw new RuntimeException(e.getCause());
}
}
// otherwise just wrap in a RuntimeException
throw new RuntimeException(e);
}
}
Aggregations