Search in sources :

Example 1 with CreateStream

use of org.apache.beam.runners.spark.io.CreateStream in project beam by apache.

the class StreamingTransformTranslator method createFromQueue.

private static <T> TransformEvaluator<CreateStream<T>> createFromQueue() {
    return new TransformEvaluator<CreateStream<T>>() {

        @Override
        public void evaluate(CreateStream<T> transform, EvaluationContext context) {
            Coder<T> coder = context.getOutput(transform).getCoder();
            JavaStreamingContext jssc = context.getStreamingContext();
            Queue<Iterable<TimestampedValue<T>>> values = transform.getBatches();
            WindowedValue.FullWindowedValueCoder<T> windowCoder = WindowedValue.FullWindowedValueCoder.of(coder, GlobalWindow.Coder.INSTANCE);
            // create the DStream from queue.
            Queue<JavaRDD<WindowedValue<T>>> rddQueue = new LinkedBlockingQueue<>();
            for (Iterable<TimestampedValue<T>> tv : values) {
                Iterable<WindowedValue<T>> windowedValues = Iterables.transform(tv, new com.google.common.base.Function<TimestampedValue<T>, WindowedValue<T>>() {

                    @Override
                    public WindowedValue<T> apply(@Nonnull TimestampedValue<T> timestampedValue) {
                        return WindowedValue.of(timestampedValue.getValue(), timestampedValue.getTimestamp(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
                    }
                });
                JavaRDD<WindowedValue<T>> rdd = jssc.sparkContext().parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder)).map(CoderHelpers.fromByteFunction(windowCoder));
                rddQueue.offer(rdd);
            }
            JavaInputDStream<WindowedValue<T>> inputDStream = jssc.queueStream(rddQueue, true);
            UnboundedDataset<T> unboundedDataset = new UnboundedDataset<T>(inputDStream, Collections.singletonList(inputDStream.inputDStream().id()));
            // add pre-baked Watermarks for the pre-baked batches.
            Queue<GlobalWatermarkHolder.SparkWatermarks> times = transform.getTimes();
            GlobalWatermarkHolder.addAll(ImmutableMap.of(unboundedDataset.getStreamSources().get(0), times));
            context.putDataset(transform, unboundedDataset);
        }

        @Override
        public String toNativeString() {
            return "streamingContext.queueStream(...)";
        }
    };
}
Also used : LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CreateStream(org.apache.beam.runners.spark.io.CreateStream) TransformEvaluator(org.apache.beam.runners.spark.translation.TransformEvaluator) JavaRDD(org.apache.spark.api.java.JavaRDD) EvaluationContext(org.apache.beam.runners.spark.translation.EvaluationContext)

Aggregations

LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)1 CreateStream (org.apache.beam.runners.spark.io.CreateStream)1 EvaluationContext (org.apache.beam.runners.spark.translation.EvaluationContext)1 TransformEvaluator (org.apache.beam.runners.spark.translation.TransformEvaluator)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)1 JavaRDD (org.apache.spark.api.java.JavaRDD)1 JavaStreamingContext (org.apache.spark.streaming.api.java.JavaStreamingContext)1