use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ViewEvaluatorFactory method createEvaluator.
private <InT, OuT> TransformEvaluator<Iterable<InT>> createEvaluator(final AppliedPTransform<PCollection<Iterable<InT>>, PCollectionView<OuT>, WriteView<InT, OuT>> application) {
PCollection<Iterable<InT>> input = (PCollection<Iterable<InT>>) Iterables.getOnlyElement(application.getInputs().values());
final PCollectionViewWriter<InT, OuT> writer = context.createPCollectionViewWriter(input, (PCollectionView<OuT>) Iterables.getOnlyElement(application.getOutputs().values()));
return new TransformEvaluator<Iterable<InT>>() {
private final List<WindowedValue<InT>> elements = new ArrayList<>();
@Override
public void processElement(WindowedValue<Iterable<InT>> element) {
for (InT input : element.getValue()) {
elements.add(element.withValue(input));
}
}
@Override
public TransformResult<Iterable<InT>> finishBundle() {
writer.add(elements);
Builder resultBuilder = StepTransformResult.withoutHold(application);
if (!elements.isEmpty()) {
resultBuilder = resultBuilder.withAdditionalOutput(OutputType.PCOLLECTION_VIEW);
}
return resultBuilder.build();
}
};
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class SideInputContainer method indexValuesByWindow.
/**
* Index the provided values by all {@link BoundedWindow windows} in which they appear.
*/
private Map<BoundedWindow, Collection<WindowedValue<?>>> indexValuesByWindow(Iterable<? extends WindowedValue<?>> values) {
Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow = new HashMap<>();
for (WindowedValue<?> value : values) {
for (BoundedWindow window : value.getWindows()) {
Collection<WindowedValue<?>> windowValues = valuesPerWindow.get(window);
if (windowValues == null) {
windowValues = new ArrayList<>();
valuesPerWindow.put(window, windowValues);
}
windowValues.add(value);
}
}
return valuesPerWindow;
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class EvaluationContextTest method writeToViewWriterThenReadReads.
@Test
public void writeToViewWriterThenReadReads() {
PCollectionViewWriter<Integer, Iterable<Integer>> viewWriter = context.createPCollectionViewWriter(PCollection.<Iterable<Integer>>createPrimitiveOutputInternal(p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED), view);
BoundedWindow window = new TestBoundedWindow(new Instant(1024L));
BoundedWindow second = new TestBoundedWindow(new Instant(899999L));
WindowedValue<Integer> firstValue = WindowedValue.of(1, new Instant(1222), window, PaneInfo.ON_TIME_AND_ONLY_FIRING);
WindowedValue<Integer> secondValue = WindowedValue.of(2, new Instant(8766L), second, PaneInfo.createPane(true, false, Timing.ON_TIME, 0, 0));
Iterable<WindowedValue<Integer>> values = ImmutableList.of(firstValue, secondValue);
viewWriter.add(values);
SideInputReader reader = context.createSideInputReader(ImmutableList.<PCollectionView<?>>of(view));
assertThat(reader.get(view, window), containsInAnyOrder(1));
assertThat(reader.get(view, second), containsInAnyOrder(2));
WindowedValue<Integer> overrittenSecondValue = WindowedValue.of(4444, new Instant(8677L), second, PaneInfo.createPane(false, true, Timing.LATE, 1, 1));
viewWriter.add(Collections.singleton(overrittenSecondValue));
assertThat(reader.get(view, second), containsInAnyOrder(2));
// The cached value is served in the earlier reader
reader = context.createSideInputReader(ImmutableList.<PCollectionView<?>>of(view));
assertThat(reader.get(view, second), containsInAnyOrder(4444));
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ImmutableListBundleFactoryTest method afterCommitGetElementsShouldHaveAddedElements.
private <T> CommittedBundle<T> afterCommitGetElementsShouldHaveAddedElements(Iterable<WindowedValue<T>> elems) {
UncommittedBundle<T> bundle = bundleFactory.createRootBundle();
Collection<Matcher<? super WindowedValue<T>>> expectations = new ArrayList<>();
Instant minElementTs = BoundedWindow.TIMESTAMP_MAX_VALUE;
for (WindowedValue<T> elem : elems) {
bundle.add(elem);
expectations.add(equalTo(elem));
if (elem.getTimestamp().isBefore(minElementTs)) {
minElementTs = elem.getTimestamp();
}
}
Matcher<Iterable<? extends WindowedValue<T>>> containsMatcher = Matchers.<WindowedValue<T>>containsInAnyOrder(expectations);
Instant commitTime = Instant.now();
CommittedBundle<T> committed = bundle.commit(commitTime);
assertThat(committed.getElements(), containsMatcher);
// Sanity check that the test is meaningful.
assertThat(minElementTs, not(equalTo(commitTime)));
assertThat(committed.getMinTimestamp(), equalTo(minElementTs));
assertThat(committed.getSynchronizedProcessingOutputWatermark(), equalTo(commitTime));
return committed;
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class SparkGlobalCombineFn method createAccumulator.
private Iterable<WindowedValue<AccumT>> createAccumulator(WindowedValue<InputT> input) {
// sort exploded inputs.
Iterable<WindowedValue<InputT>> sortedInputs = sortByWindows(input.explodeWindows());
TimestampCombiner timestampCombiner = windowingStrategy.getTimestampCombiner();
WindowFn<?, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
//--- inputs iterator, by window order.
final Iterator<WindowedValue<InputT>> iterator = sortedInputs.iterator();
WindowedValue<InputT> currentInput = iterator.next();
BoundedWindow currentWindow = Iterables.getFirst(currentInput.getWindows(), null);
// first create the accumulator and accumulate first input.
AccumT accumulator = combineFn.createAccumulator(ctxtForInput(currentInput));
accumulator = combineFn.addInput(accumulator, currentInput.getValue(), ctxtForInput(currentInput));
// keep track of the timestamps assigned by the TimestampCombiner.
Instant windowTimestamp = timestampCombiner.assign(currentWindow, windowingStrategy.getWindowFn().getOutputTime(currentInput.getTimestamp(), currentWindow));
// accumulate the next windows, or output.
List<WindowedValue<AccumT>> output = Lists.newArrayList();
// if merging, merge overlapping windows, e.g. Sessions.
final boolean merging = !windowingStrategy.getWindowFn().isNonMerging();
while (iterator.hasNext()) {
WindowedValue<InputT> nextValue = iterator.next();
BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
boolean mergingAndIntersecting = merging && isIntersecting((IntervalWindow) currentWindow, (IntervalWindow) nextWindow);
if (mergingAndIntersecting || nextWindow.equals(currentWindow)) {
if (mergingAndIntersecting) {
// merge intersecting windows.
currentWindow = merge((IntervalWindow) currentWindow, (IntervalWindow) nextWindow);
}
// keep accumulating and carry on ;-)
accumulator = combineFn.addInput(accumulator, nextValue.getValue(), ctxtForInput(nextValue));
windowTimestamp = timestampCombiner.merge(currentWindow, windowTimestamp, windowingStrategy.getWindowFn().getOutputTime(nextValue.getTimestamp(), currentWindow));
} else {
// moving to the next window, first add the current accumulation to output
// and initialize the accumulator.
output.add(WindowedValue.of(accumulator, windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
// re-init accumulator, window and timestamp.
accumulator = combineFn.createAccumulator(ctxtForInput(nextValue));
accumulator = combineFn.addInput(accumulator, nextValue.getValue(), ctxtForInput(nextValue));
currentWindow = nextWindow;
windowTimestamp = timestampCombiner.assign(currentWindow, windowFn.getOutputTime(nextValue.getTimestamp(), currentWindow));
}
}
// add last accumulator to the output.
output.add(WindowedValue.of(accumulator, windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
return output;
}
Aggregations