use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class SideInputInitializer method initializeBroadcastVariable.
@Override
public Map<BoundedWindow, ViewT> initializeBroadcastVariable(Iterable<WindowedValue<ElemT>> inputValues) {
// first partition into windows
Map<BoundedWindow, List<WindowedValue<ElemT>>> partitionedElements = new HashMap<>();
for (WindowedValue<ElemT> value : inputValues) {
for (BoundedWindow window : value.getWindows()) {
List<WindowedValue<ElemT>> windowedValues = partitionedElements.get(window);
if (windowedValues == null) {
windowedValues = new ArrayList<>();
partitionedElements.put(window, windowedValues);
}
windowedValues.add(value);
}
}
Map<BoundedWindow, ViewT> resultMap = new HashMap<>();
for (Map.Entry<BoundedWindow, List<WindowedValue<ElemT>>> elements : partitionedElements.entrySet()) {
@SuppressWarnings("unchecked") Iterable<WindowedValue<?>> elementsIterable = (List<WindowedValue<?>>) (List<?>) elements.getValue();
resultMap.put(elements.getKey(), view.getViewFn().apply(elementsIterable));
}
return resultMap;
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class SortingFlinkCombineRunner method combine.
@Override
public void combine(FlinkCombiner<K, InputT, AccumT, OutputT> flinkCombiner, WindowingStrategy<Object, W> windowingStrategy, SideInputReader sideInputReader, PipelineOptions options, Iterable<WindowedValue<KV<K, InputT>>> elements, Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
@SuppressWarnings("unchecked") TimestampCombiner timestampCombiner = (TimestampCombiner) windowingStrategy.getTimestampCombiner();
WindowFn<Object, W> windowFn = windowingStrategy.getWindowFn();
// get all elements so that we can sort them, has to fit into
// memory
// this seems very unprudent, but correct, for now
List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
for (WindowedValue<KV<K, InputT>> inputValue : elements) {
for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
sortedInput.add(exploded);
}
}
Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
@Override
public int compare(WindowedValue<KV<K, InputT>> o1, WindowedValue<KV<K, InputT>> o2) {
return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp().compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
}
});
if (!windowingStrategy.getWindowFn().isNonMerging()) {
// merge windows, we have to do it in an extra pre-processing step and
// can't do it as we go since the window of early elements would not
// be correct when calling the CombineFn
mergeWindow(sortedInput);
}
// iterate over the elements that are sorted by window timestamp
final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
// create accumulator using the first elements key
WindowedValue<KV<K, InputT>> currentValue = iterator.next();
K key = currentValue.getValue().getKey();
W currentWindow = (W) Iterables.getOnlyElement(currentValue.getWindows());
InputT firstValue = currentValue.getValue().getValue();
AccumT accumulator = flinkCombiner.firstInput(key, firstValue, options, sideInputReader, currentValue.getWindows());
// we use this to keep track of the timestamps assigned by the TimestampCombiner
Instant windowTimestamp = timestampCombiner.assign(currentWindow, windowFn.getOutputTime(currentValue.getTimestamp(), currentWindow));
while (iterator.hasNext()) {
WindowedValue<KV<K, InputT>> nextValue = iterator.next();
W nextWindow = (W) Iterables.getOnlyElement(nextValue.getWindows());
if (currentWindow.equals(nextWindow)) {
// continue accumulating and merge windows
InputT value = nextValue.getValue().getValue();
accumulator = flinkCombiner.addInput(key, accumulator, value, options, sideInputReader, currentValue.getWindows());
windowTimestamp = timestampCombiner.combine(windowTimestamp, timestampCombiner.assign(currentWindow, windowFn.getOutputTime(nextValue.getTimestamp(), currentWindow)));
} else {
// emit the value that we currently have
out.collect(WindowedValue.of(KV.of(key, flinkCombiner.extractOutput(key, accumulator, options, sideInputReader, currentValue.getWindows())), windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
currentWindow = nextWindow;
currentValue = nextValue;
InputT value = nextValue.getValue().getValue();
accumulator = flinkCombiner.firstInput(key, value, options, sideInputReader, currentValue.getWindows());
windowTimestamp = timestampCombiner.assign(currentWindow, windowFn.getOutputTime(nextValue.getTimestamp(), currentWindow));
}
}
// emit the final accumulator
out.collect(WindowedValue.of(KV.of(key, flinkCombiner.extractOutput(key, accumulator, options, sideInputReader, currentValue.getWindows())), windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class SideInputContainer method updatePCollectionViewWindowValues.
/**
* Set the value of the {@link PCollectionView} in the {@link BoundedWindow} to be based on the
* specified values, if the values are part of a later pane than currently exist within the
* {@link PCollectionViewWindow}.
*/
private void updatePCollectionViewWindowValues(PCollectionView<?> view, BoundedWindow window, Collection<WindowedValue<?>> windowValues) {
PCollectionViewWindow<?> windowedView = PCollectionViewWindow.of(view, window);
AtomicReference<Iterable<? extends WindowedValue<?>>> contents = viewByWindows.getUnchecked(windowedView);
if (contents.compareAndSet(null, windowValues)) {
// the value had never been set, so we set it and are done.
return;
}
PaneInfo newPane = windowValues.iterator().next().getPane();
Iterable<? extends WindowedValue<?>> existingValues;
long existingPane;
do {
existingValues = contents.get();
existingPane = Iterables.isEmpty(existingValues) ? -1L : existingValues.iterator().next().getPane().getIndex();
} while (newPane.getIndex() > existingPane && !contents.compareAndSet(existingValues, windowValues));
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class BoundedReadEvaluatorFactoryTest method boundedSourceInMemoryTransformEvaluatorShardsOfSource.
@Test
public void boundedSourceInMemoryTransformEvaluatorShardsOfSource() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
List<? extends BoundedSource<Long>> splits = source.split(source.getEstimatedSizeBytes(options) / 2, options);
UncommittedBundle<BoundedSourceShard<Long>> rootBundle = bundleFactory.createRootBundle();
for (BoundedSource<Long> split : splits) {
BoundedSourceShard<Long> shard = BoundedSourceShard.of(split);
rootBundle.add(WindowedValue.valueInGlobalWindow(shard));
}
CommittedBundle<BoundedSourceShard<Long>> shards = rootBundle.commit(Instant.now());
TransformEvaluator<BoundedSourceShard<Long>> evaluator = factory.forApplication(longsProducer, shards);
for (WindowedValue<BoundedSourceShard<Long>> shard : shards.getElements()) {
UncommittedBundle<Long> outputBundle = bundleFactory.createBundle(longs);
when(context.createBundle(longs)).thenReturn(outputBundle);
evaluator.processElement(shard);
}
TransformResult<?> result = evaluator.finishBundle();
assertThat(Iterables.size(result.getOutputBundles()), equalTo(splits.size()));
List<WindowedValue<?>> outputElems = new ArrayList<>();
for (UncommittedBundle<?> outputBundle : result.getOutputBundles()) {
CommittedBundle<?> outputs = outputBundle.commit(Instant.now());
for (WindowedValue<?> outputElem : outputs.getElements()) {
outputElems.add(outputElem);
}
}
assertThat(outputElems, Matchers.<WindowedValue<?>>containsInAnyOrder(gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class StreamingTransformTranslator method createFromQueue.
private static <T> TransformEvaluator<CreateStream<T>> createFromQueue() {
return new TransformEvaluator<CreateStream<T>>() {
@Override
public void evaluate(CreateStream<T> transform, EvaluationContext context) {
Coder<T> coder = context.getOutput(transform).getCoder();
JavaStreamingContext jssc = context.getStreamingContext();
Queue<Iterable<TimestampedValue<T>>> values = transform.getBatches();
WindowedValue.FullWindowedValueCoder<T> windowCoder = WindowedValue.FullWindowedValueCoder.of(coder, GlobalWindow.Coder.INSTANCE);
// create the DStream from queue.
Queue<JavaRDD<WindowedValue<T>>> rddQueue = new LinkedBlockingQueue<>();
for (Iterable<TimestampedValue<T>> tv : values) {
Iterable<WindowedValue<T>> windowedValues = Iterables.transform(tv, new com.google.common.base.Function<TimestampedValue<T>, WindowedValue<T>>() {
@Override
public WindowedValue<T> apply(@Nonnull TimestampedValue<T> timestampedValue) {
return WindowedValue.of(timestampedValue.getValue(), timestampedValue.getTimestamp(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
}
});
JavaRDD<WindowedValue<T>> rdd = jssc.sparkContext().parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder)).map(CoderHelpers.fromByteFunction(windowCoder));
rddQueue.offer(rdd);
}
JavaInputDStream<WindowedValue<T>> inputDStream = jssc.queueStream(rddQueue, true);
UnboundedDataset<T> unboundedDataset = new UnboundedDataset<T>(inputDStream, Collections.singletonList(inputDStream.inputDStream().id()));
// add pre-baked Watermarks for the pre-baked batches.
Queue<GlobalWatermarkHolder.SparkWatermarks> times = transform.getTimes();
GlobalWatermarkHolder.addAll(ImmutableMap.of(unboundedDataset.getStreamSources().get(0), times));
context.putDataset(transform, unboundedDataset);
}
@Override
public String toNativeString() {
return "streamingContext.queueStream(...)";
}
};
}
Aggregations