Search in sources :

Example 76 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class SideInputInitializer method initializeBroadcastVariable.

@Override
public Map<BoundedWindow, ViewT> initializeBroadcastVariable(Iterable<WindowedValue<ElemT>> inputValues) {
    // first partition into windows
    Map<BoundedWindow, List<WindowedValue<ElemT>>> partitionedElements = new HashMap<>();
    for (WindowedValue<ElemT> value : inputValues) {
        for (BoundedWindow window : value.getWindows()) {
            List<WindowedValue<ElemT>> windowedValues = partitionedElements.get(window);
            if (windowedValues == null) {
                windowedValues = new ArrayList<>();
                partitionedElements.put(window, windowedValues);
            }
            windowedValues.add(value);
        }
    }
    Map<BoundedWindow, ViewT> resultMap = new HashMap<>();
    for (Map.Entry<BoundedWindow, List<WindowedValue<ElemT>>> elements : partitionedElements.entrySet()) {
        @SuppressWarnings("unchecked") Iterable<WindowedValue<?>> elementsIterable = (List<WindowedValue<?>>) (List<?>) elements.getValue();
        resultMap.put(elements.getKey(), view.getViewFn().apply(elementsIterable));
    }
    return resultMap;
}
Also used : HashMap(java.util.HashMap) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) HashMap(java.util.HashMap)

Example 77 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class SortingFlinkCombineRunner method combine.

@Override
public void combine(FlinkCombiner<K, InputT, AccumT, OutputT> flinkCombiner, WindowingStrategy<Object, W> windowingStrategy, SideInputReader sideInputReader, PipelineOptions options, Iterable<WindowedValue<KV<K, InputT>>> elements, Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
    @SuppressWarnings("unchecked") TimestampCombiner timestampCombiner = (TimestampCombiner) windowingStrategy.getTimestampCombiner();
    WindowFn<Object, W> windowFn = windowingStrategy.getWindowFn();
    // get all elements so that we can sort them, has to fit into
    // memory
    // this seems very unprudent, but correct, for now
    List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
    for (WindowedValue<KV<K, InputT>> inputValue : elements) {
        for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
            sortedInput.add(exploded);
        }
    }
    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {

        @Override
        public int compare(WindowedValue<KV<K, InputT>> o1, WindowedValue<KV<K, InputT>> o2) {
            return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp().compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
        }
    });
    if (!windowingStrategy.getWindowFn().isNonMerging()) {
        // merge windows, we have to do it in an extra pre-processing step and
        // can't do it as we go since the window of early elements would not
        // be correct when calling the CombineFn
        mergeWindow(sortedInput);
    }
    // iterate over the elements that are sorted by window timestamp
    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
    // create accumulator using the first elements key
    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
    K key = currentValue.getValue().getKey();
    W currentWindow = (W) Iterables.getOnlyElement(currentValue.getWindows());
    InputT firstValue = currentValue.getValue().getValue();
    AccumT accumulator = flinkCombiner.firstInput(key, firstValue, options, sideInputReader, currentValue.getWindows());
    // we use this to keep track of the timestamps assigned by the TimestampCombiner
    Instant windowTimestamp = timestampCombiner.assign(currentWindow, windowFn.getOutputTime(currentValue.getTimestamp(), currentWindow));
    while (iterator.hasNext()) {
        WindowedValue<KV<K, InputT>> nextValue = iterator.next();
        W nextWindow = (W) Iterables.getOnlyElement(nextValue.getWindows());
        if (currentWindow.equals(nextWindow)) {
            // continue accumulating and merge windows
            InputT value = nextValue.getValue().getValue();
            accumulator = flinkCombiner.addInput(key, accumulator, value, options, sideInputReader, currentValue.getWindows());
            windowTimestamp = timestampCombiner.combine(windowTimestamp, timestampCombiner.assign(currentWindow, windowFn.getOutputTime(nextValue.getTimestamp(), currentWindow)));
        } else {
            // emit the value that we currently have
            out.collect(WindowedValue.of(KV.of(key, flinkCombiner.extractOutput(key, accumulator, options, sideInputReader, currentValue.getWindows())), windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
            currentWindow = nextWindow;
            currentValue = nextValue;
            InputT value = nextValue.getValue().getValue();
            accumulator = flinkCombiner.firstInput(key, value, options, sideInputReader, currentValue.getWindows());
            windowTimestamp = timestampCombiner.assign(currentWindow, windowFn.getOutputTime(nextValue.getTimestamp(), currentWindow));
        }
    }
    // emit the final accumulator
    out.collect(WindowedValue.of(KV.of(key, flinkCombiner.extractOutput(key, accumulator, options, sideInputReader, currentValue.getWindows())), windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
}
Also used : TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue)

Example 78 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class SideInputContainer method updatePCollectionViewWindowValues.

/**
   * Set the value of the {@link PCollectionView} in the {@link BoundedWindow} to be based on the
   * specified values, if the values are part of a later pane than currently exist within the
   * {@link PCollectionViewWindow}.
   */
private void updatePCollectionViewWindowValues(PCollectionView<?> view, BoundedWindow window, Collection<WindowedValue<?>> windowValues) {
    PCollectionViewWindow<?> windowedView = PCollectionViewWindow.of(view, window);
    AtomicReference<Iterable<? extends WindowedValue<?>>> contents = viewByWindows.getUnchecked(windowedView);
    if (contents.compareAndSet(null, windowValues)) {
        // the value had never been set, so we set it and are done.
        return;
    }
    PaneInfo newPane = windowValues.iterator().next().getPane();
    Iterable<? extends WindowedValue<?>> existingValues;
    long existingPane;
    do {
        existingValues = contents.get();
        existingPane = Iterables.isEmpty(existingValues) ? -1L : existingValues.iterator().next().getPane().getIndex();
    } while (newPane.getIndex() > existingPane && !contents.compareAndSet(existingValues, windowValues));
}
Also used : WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo)

Example 79 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class BoundedReadEvaluatorFactoryTest method boundedSourceInMemoryTransformEvaluatorShardsOfSource.

@Test
public void boundedSourceInMemoryTransformEvaluatorShardsOfSource() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    List<? extends BoundedSource<Long>> splits = source.split(source.getEstimatedSizeBytes(options) / 2, options);
    UncommittedBundle<BoundedSourceShard<Long>> rootBundle = bundleFactory.createRootBundle();
    for (BoundedSource<Long> split : splits) {
        BoundedSourceShard<Long> shard = BoundedSourceShard.of(split);
        rootBundle.add(WindowedValue.valueInGlobalWindow(shard));
    }
    CommittedBundle<BoundedSourceShard<Long>> shards = rootBundle.commit(Instant.now());
    TransformEvaluator<BoundedSourceShard<Long>> evaluator = factory.forApplication(longsProducer, shards);
    for (WindowedValue<BoundedSourceShard<Long>> shard : shards.getElements()) {
        UncommittedBundle<Long> outputBundle = bundleFactory.createBundle(longs);
        when(context.createBundle(longs)).thenReturn(outputBundle);
        evaluator.processElement(shard);
    }
    TransformResult<?> result = evaluator.finishBundle();
    assertThat(Iterables.size(result.getOutputBundles()), equalTo(splits.size()));
    List<WindowedValue<?>> outputElems = new ArrayList<>();
    for (UncommittedBundle<?> outputBundle : result.getOutputBundles()) {
        CommittedBundle<?> outputs = outputBundle.commit(Instant.now());
        for (WindowedValue<?> outputElem : outputs.getElements()) {
            outputElems.add(outputElem);
        }
    }
    assertThat(outputElems, Matchers.<WindowedValue<?>>containsInAnyOrder(gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
}
Also used : ArrayList(java.util.ArrayList) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedSourceShard(org.apache.beam.runners.direct.BoundedReadEvaluatorFactory.BoundedSourceShard) Test(org.junit.Test)

Example 80 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class StreamingTransformTranslator method createFromQueue.

private static <T> TransformEvaluator<CreateStream<T>> createFromQueue() {
    return new TransformEvaluator<CreateStream<T>>() {

        @Override
        public void evaluate(CreateStream<T> transform, EvaluationContext context) {
            Coder<T> coder = context.getOutput(transform).getCoder();
            JavaStreamingContext jssc = context.getStreamingContext();
            Queue<Iterable<TimestampedValue<T>>> values = transform.getBatches();
            WindowedValue.FullWindowedValueCoder<T> windowCoder = WindowedValue.FullWindowedValueCoder.of(coder, GlobalWindow.Coder.INSTANCE);
            // create the DStream from queue.
            Queue<JavaRDD<WindowedValue<T>>> rddQueue = new LinkedBlockingQueue<>();
            for (Iterable<TimestampedValue<T>> tv : values) {
                Iterable<WindowedValue<T>> windowedValues = Iterables.transform(tv, new com.google.common.base.Function<TimestampedValue<T>, WindowedValue<T>>() {

                    @Override
                    public WindowedValue<T> apply(@Nonnull TimestampedValue<T> timestampedValue) {
                        return WindowedValue.of(timestampedValue.getValue(), timestampedValue.getTimestamp(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
                    }
                });
                JavaRDD<WindowedValue<T>> rdd = jssc.sparkContext().parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder)).map(CoderHelpers.fromByteFunction(windowCoder));
                rddQueue.offer(rdd);
            }
            JavaInputDStream<WindowedValue<T>> inputDStream = jssc.queueStream(rddQueue, true);
            UnboundedDataset<T> unboundedDataset = new UnboundedDataset<T>(inputDStream, Collections.singletonList(inputDStream.inputDStream().id()));
            // add pre-baked Watermarks for the pre-baked batches.
            Queue<GlobalWatermarkHolder.SparkWatermarks> times = transform.getTimes();
            GlobalWatermarkHolder.addAll(ImmutableMap.of(unboundedDataset.getStreamSources().get(0), times));
            context.putDataset(transform, unboundedDataset);
        }

        @Override
        public String toNativeString() {
            return "streamingContext.queueStream(...)";
        }
    };
}
Also used : LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CreateStream(org.apache.beam.runners.spark.io.CreateStream) TransformEvaluator(org.apache.beam.runners.spark.translation.TransformEvaluator) JavaRDD(org.apache.spark.api.java.JavaRDD) EvaluationContext(org.apache.beam.runners.spark.translation.EvaluationContext)

Aggregations

WindowedValue (org.apache.beam.sdk.util.WindowedValue)89 Test (org.junit.Test)53 Instant (org.joda.time.Instant)47 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)36 KV (org.apache.beam.sdk.values.KV)19 ArrayList (java.util.ArrayList)17 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)17 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)17 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)17 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)16 TupleTag (org.apache.beam.sdk.values.TupleTag)13 JavaRDD (org.apache.spark.api.java.JavaRDD)8 ByteString (com.google.protobuf.ByteString)7 BeamFnApi (org.apache.beam.fn.v1.BeamFnApi)7 ThrowingConsumer (org.apache.beam.fn.harness.fn.ThrowingConsumer)6 IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)6 TimestampCombiner (org.apache.beam.sdk.transforms.windowing.TimestampCombiner)6 CloseableThrowingConsumer (org.apache.beam.fn.harness.fn.CloseableThrowingConsumer)5 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)5 EvaluationContext (org.apache.beam.runners.spark.translation.EvaluationContext)5