use of org.apache.beam.sdk.transforms.windowing.TimestampCombiner in project beam by apache.
the class WindowingStrategyTranslation method fromProto.
/**
* Converts from {@link RunnerApi.WindowingStrategy} to the SDK's {@link WindowingStrategy} using
* the provided components to dereferences identifiers found in the proto.
*/
public static WindowingStrategy<?, ?> fromProto(RunnerApi.WindowingStrategy proto, Components components) throws InvalidProtocolBufferException {
SdkFunctionSpec windowFnSpec = proto.getWindowFn();
WindowFn<?, ?> windowFn = windowFnFromProto(windowFnSpec);
TimestampCombiner timestampCombiner = timestampCombinerFromProto(proto.getOutputTime());
AccumulationMode accumulationMode = fromProto(proto.getAccumulationMode());
Trigger trigger = TriggerTranslation.fromProto(proto.getTrigger());
ClosingBehavior closingBehavior = fromProto(proto.getClosingBehavior());
Duration allowedLateness = Duration.millis(proto.getAllowedLateness());
return WindowingStrategy.of(windowFn).withAllowedLateness(allowedLateness).withMode(accumulationMode).withTrigger(trigger).withTimestampCombiner(timestampCombiner).withClosingBehavior(closingBehavior);
}
use of org.apache.beam.sdk.transforms.windowing.TimestampCombiner in project beam by apache.
the class CopyOnAccessInMemoryStateInternalsTest method testWatermarkHoldStateWithUnderlying.
@Test
public void testWatermarkHoldStateWithUnderlying() {
CopyOnAccessInMemoryStateInternals<String> underlying = CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
TimestampCombiner timestampCombiner = TimestampCombiner.EARLIEST;
StateNamespace namespace = new StateNamespaceForTest("foo");
StateTag<WatermarkHoldState> stateTag = StateTags.watermarkStateInternal("wmstate", timestampCombiner);
WatermarkHoldState underlyingValue = underlying.state(namespace, stateTag);
assertThat(underlyingValue.read(), nullValue());
underlyingValue.add(new Instant(250L));
assertThat(underlyingValue.read(), equalTo(new Instant(250L)));
CopyOnAccessInMemoryStateInternals<String> internals = CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
WatermarkHoldState copyOnAccessState = internals.state(namespace, stateTag);
assertThat(copyOnAccessState.read(), equalTo(new Instant(250L)));
copyOnAccessState.add(new Instant(100L));
assertThat(copyOnAccessState.read(), equalTo(new Instant(100L)));
assertThat(underlyingValue.read(), equalTo(new Instant(250L)));
copyOnAccessState.add(new Instant(500L));
assertThat(copyOnAccessState.read(), equalTo(new Instant(100L)));
WatermarkHoldState reReadUnderlyingValue = underlying.state(namespace, stateTag);
assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
}
use of org.apache.beam.sdk.transforms.windowing.TimestampCombiner in project beam by apache.
the class SparkGlobalCombineFn method createAccumulator.
private Iterable<WindowedValue<AccumT>> createAccumulator(WindowedValue<InputT> input) {
// sort exploded inputs.
Iterable<WindowedValue<InputT>> sortedInputs = sortByWindows(input.explodeWindows());
TimestampCombiner timestampCombiner = windowingStrategy.getTimestampCombiner();
WindowFn<?, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
//--- inputs iterator, by window order.
final Iterator<WindowedValue<InputT>> iterator = sortedInputs.iterator();
WindowedValue<InputT> currentInput = iterator.next();
BoundedWindow currentWindow = Iterables.getFirst(currentInput.getWindows(), null);
// first create the accumulator and accumulate first input.
AccumT accumulator = combineFn.createAccumulator(ctxtForInput(currentInput));
accumulator = combineFn.addInput(accumulator, currentInput.getValue(), ctxtForInput(currentInput));
// keep track of the timestamps assigned by the TimestampCombiner.
Instant windowTimestamp = timestampCombiner.assign(currentWindow, windowingStrategy.getWindowFn().getOutputTime(currentInput.getTimestamp(), currentWindow));
// accumulate the next windows, or output.
List<WindowedValue<AccumT>> output = Lists.newArrayList();
// if merging, merge overlapping windows, e.g. Sessions.
final boolean merging = !windowingStrategy.getWindowFn().isNonMerging();
while (iterator.hasNext()) {
WindowedValue<InputT> nextValue = iterator.next();
BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
boolean mergingAndIntersecting = merging && isIntersecting((IntervalWindow) currentWindow, (IntervalWindow) nextWindow);
if (mergingAndIntersecting || nextWindow.equals(currentWindow)) {
if (mergingAndIntersecting) {
// merge intersecting windows.
currentWindow = merge((IntervalWindow) currentWindow, (IntervalWindow) nextWindow);
}
// keep accumulating and carry on ;-)
accumulator = combineFn.addInput(accumulator, nextValue.getValue(), ctxtForInput(nextValue));
windowTimestamp = timestampCombiner.merge(currentWindow, windowTimestamp, windowingStrategy.getWindowFn().getOutputTime(nextValue.getTimestamp(), currentWindow));
} else {
// moving to the next window, first add the current accumulation to output
// and initialize the accumulator.
output.add(WindowedValue.of(accumulator, windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
// re-init accumulator, window and timestamp.
accumulator = combineFn.createAccumulator(ctxtForInput(nextValue));
accumulator = combineFn.addInput(accumulator, nextValue.getValue(), ctxtForInput(nextValue));
currentWindow = nextWindow;
windowTimestamp = timestampCombiner.assign(currentWindow, windowFn.getOutputTime(nextValue.getTimestamp(), currentWindow));
}
}
// add last accumulator to the output.
output.add(WindowedValue.of(accumulator, windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
return output;
}
use of org.apache.beam.sdk.transforms.windowing.TimestampCombiner in project beam by apache.
the class SparkGlobalCombineFn method combOp.
/**
* Implement Spark's combOp function in:
* <p>
* {@link org.apache.spark.api.java.JavaRDD#aggregate}.
* </p>
*/
Iterable<WindowedValue<AccumT>> combOp(Iterable<WindowedValue<AccumT>> a1, Iterable<WindowedValue<AccumT>> a2) {
// concatenate accumulators.
Iterable<WindowedValue<AccumT>> accumulators = Iterables.concat(a1, a2);
// if empty, return an empty accumulators iterable.
if (!accumulators.iterator().hasNext()) {
return Lists.newArrayList();
}
// sort accumulators, no need to explode since inputs were exploded.
Iterable<WindowedValue<AccumT>> sortedAccumulators = sortByWindows(accumulators);
@SuppressWarnings("unchecked") TimestampCombiner timestampCombiner = windowingStrategy.getTimestampCombiner();
//--- accumulators iterator, by window order.
final Iterator<WindowedValue<AccumT>> iterator = sortedAccumulators.iterator();
// get the first accumulator and assign it to the current window's accumulators.
WindowedValue<AccumT> currentValue = iterator.next();
BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
List<AccumT> currentWindowAccumulators = Lists.newArrayList();
currentWindowAccumulators.add(currentValue.getValue());
// keep track of the timestamps assigned by the TimestampCombiner,
// in createCombiner we already merge the timestamps assigned
// to individual elements, here we will just merge them.
List<Instant> windowTimestamps = Lists.newArrayList();
windowTimestamps.add(currentValue.getTimestamp());
// accumulate the next windows, or output.
List<WindowedValue<AccumT>> output = Lists.newArrayList();
// if merging, merge overlapping windows, e.g. Sessions.
final boolean merging = !windowingStrategy.getWindowFn().isNonMerging();
while (iterator.hasNext()) {
WindowedValue<AccumT> nextValue = iterator.next();
BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
boolean mergingAndIntersecting = merging && isIntersecting((IntervalWindow) currentWindow, (IntervalWindow) nextWindow);
if (mergingAndIntersecting || nextWindow.equals(currentWindow)) {
if (mergingAndIntersecting) {
// merge intersecting windows.
currentWindow = merge((IntervalWindow) currentWindow, (IntervalWindow) nextWindow);
}
// add to window accumulators.
currentWindowAccumulators.add(nextValue.getValue());
windowTimestamps.add(nextValue.getTimestamp());
} else {
// before moving to the next window,
// add the current accumulation to the output and initialize the accumulation.
// merge the timestamps of all accumulators to merge.
Instant mergedTimestamp = timestampCombiner.merge(currentWindow, windowTimestamps);
// merge accumulators.
// transforming a KV<K, Iterable<AccumT>> into a KV<K, Iterable<AccumT>>.
// for the (possibly merged) window.
Iterable<AccumT> accumsToMerge = Iterables.unmodifiableIterable(currentWindowAccumulators);
WindowedValue<Iterable<AccumT>> preMergeWindowedValue = WindowedValue.of(accumsToMerge, mergedTimestamp, currentWindow, PaneInfo.NO_FIRING);
// applying the actual combiner onto the accumulators.
AccumT accumulated = combineFn.mergeAccumulators(accumsToMerge, ctxtForInput(preMergeWindowedValue));
WindowedValue<AccumT> postMergeWindowedValue = preMergeWindowedValue.withValue(accumulated);
// emit the accumulated output.
output.add(postMergeWindowedValue);
// re-init accumulator, window and timestamps.
currentWindowAccumulators.clear();
currentWindowAccumulators.add(nextValue.getValue());
currentWindow = nextWindow;
windowTimestamps.clear();
windowTimestamps.add(nextValue.getTimestamp());
}
}
// merge the last chunk of accumulators.
Instant mergedTimestamp = timestampCombiner.merge(currentWindow, windowTimestamps);
Iterable<AccumT> accumsToMerge = Iterables.unmodifiableIterable(currentWindowAccumulators);
WindowedValue<Iterable<AccumT>> preMergeWindowedValue = WindowedValue.of(accumsToMerge, mergedTimestamp, currentWindow, PaneInfo.NO_FIRING);
AccumT accumulated = combineFn.mergeAccumulators(accumsToMerge, ctxtForInput(preMergeWindowedValue));
WindowedValue<AccumT> postMergeWindowedValue = preMergeWindowedValue.withValue(accumulated);
output.add(postMergeWindowedValue);
return output;
}
use of org.apache.beam.sdk.transforms.windowing.TimestampCombiner in project beam by apache.
the class SparkKeyedCombineFn method mergeCombiners.
/**
* Implements Spark's mergeCombiners function in:
* <p>
* {@link org.apache.spark.rdd.PairRDDFunctions#combineByKey}.
* </p>
*/
Iterable<WindowedValue<KV<K, AccumT>>> mergeCombiners(Iterable<WindowedValue<KV<K, AccumT>>> a1, Iterable<WindowedValue<KV<K, AccumT>>> a2) {
// concatenate accumulators.
Iterable<WindowedValue<KV<K, AccumT>>> accumulators = Iterables.concat(a1, a2);
// sort accumulators, no need to explode since inputs were exploded.
Iterable<WindowedValue<KV<K, AccumT>>> sortedAccumulators = sortByWindows(accumulators);
@SuppressWarnings("unchecked") TimestampCombiner timestampCombiner = windowingStrategy.getTimestampCombiner();
//--- accumulators iterator, by window order.
final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedAccumulators.iterator();
// get the first accumulator and assign it to the current window's accumulators.
WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
K key = currentValue.getValue().getKey();
BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
List<AccumT> currentWindowAccumulators = Lists.newArrayList();
currentWindowAccumulators.add(currentValue.getValue().getValue());
// keep track of the timestamps assigned by the TimestampCombiner,
// in createCombiner we already merge the timestamps assigned
// to individual elements, here we will just merge them.
List<Instant> windowTimestamps = Lists.newArrayList();
windowTimestamps.add(currentValue.getTimestamp());
// accumulate the next windows, or output.
List<WindowedValue<KV<K, AccumT>>> output = Lists.newArrayList();
// if merging, merge overlapping windows, e.g. Sessions.
final boolean merging = !windowingStrategy.getWindowFn().isNonMerging();
while (iterator.hasNext()) {
WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
boolean mergingAndIntersecting = merging && isIntersecting((IntervalWindow) currentWindow, (IntervalWindow) nextWindow);
if (mergingAndIntersecting || nextWindow.equals(currentWindow)) {
if (mergingAndIntersecting) {
// merge intersecting windows.
currentWindow = merge((IntervalWindow) currentWindow, (IntervalWindow) nextWindow);
}
// add to window accumulators.
currentWindowAccumulators.add(nextValue.getValue().getValue());
windowTimestamps.add(nextValue.getTimestamp());
} else {
// before moving to the next window,
// add the current accumulation to the output and initialize the accumulation.
// merge the timestamps of all accumulators to merge.
Instant mergedTimestamp = timestampCombiner.merge(currentWindow, windowTimestamps);
// merge accumulators.
// transforming a KV<K, Iterable<AccumT>> into a KV<K, Iterable<AccumT>>.
// for the (possibly merged) window.
Iterable<AccumT> accumsToMerge = Iterables.unmodifiableIterable(currentWindowAccumulators);
WindowedValue<KV<K, Iterable<AccumT>>> preMergeWindowedValue = WindowedValue.of(KV.of(key, accumsToMerge), mergedTimestamp, currentWindow, PaneInfo.NO_FIRING);
// applying the actual combiner onto the accumulators.
AccumT accumulated = combineFn.mergeAccumulators(accumsToMerge, ctxtForInput(preMergeWindowedValue));
WindowedValue<KV<K, AccumT>> postMergeWindowedValue = preMergeWindowedValue.withValue(KV.of(key, accumulated));
// emit the accumulated output.
output.add(postMergeWindowedValue);
// re-init accumulator, window and timestamps.
currentWindowAccumulators.clear();
currentWindowAccumulators.add(nextValue.getValue().getValue());
currentWindow = nextWindow;
windowTimestamps.clear();
windowTimestamps.add(nextValue.getTimestamp());
}
}
// merge the last chunk of accumulators.
Instant mergedTimestamp = timestampCombiner.merge(currentWindow, windowTimestamps);
Iterable<AccumT> accumsToMerge = Iterables.unmodifiableIterable(currentWindowAccumulators);
WindowedValue<KV<K, Iterable<AccumT>>> preMergeWindowedValue = WindowedValue.of(KV.of(key, accumsToMerge), mergedTimestamp, currentWindow, PaneInfo.NO_FIRING);
AccumT accumulated = combineFn.mergeAccumulators(accumsToMerge, ctxtForInput(preMergeWindowedValue));
WindowedValue<KV<K, AccumT>> postMergeWindowedValue = preMergeWindowedValue.withValue(KV.of(key, accumulated));
output.add(postMergeWindowedValue);
return output;
}
Aggregations