use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class SparkSideInputReader method get.
@Override
@Nullable
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
// --- validate sideInput.
checkNotNull(view, "The PCollectionView passed to sideInput cannot be null ");
KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>> windowedBroadcastHelper = sideInputs.get(view.getTagInternal());
checkNotNull(windowedBroadcastHelper, "SideInput for view " + view + " is not available.");
// --- sideInput window
final BoundedWindow sideInputWindow = view.getWindowMappingFn().getSideInputWindow(window);
// --- match the appropriate sideInput window.
// a tag will point to all matching sideInputs, that is all windows.
// now that we've obtained the appropriate sideInputWindow, all that's left is to filter by it.
Iterable<WindowedValue<?>> availableSideInputs = (Iterable<WindowedValue<?>>) windowedBroadcastHelper.getValue().getValue();
Iterable<?> sideInputForWindow = StreamSupport.stream(availableSideInputs.spliterator(), false).filter(sideInputCandidate -> {
if (sideInputCandidate == null) {
return false;
}
return Iterables.contains(sideInputCandidate.getWindows(), sideInputWindow);
}).collect(Collectors.toList()).stream().map(WindowedValue::getValue).collect(Collectors.toList());
switch(view.getViewFn().getMaterialization().getUrn()) {
case Materializations.ITERABLE_MATERIALIZATION_URN:
{
ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
return viewFn.apply(() -> sideInputForWindow);
}
case Materializations.MULTIMAP_MATERIALIZATION_URN:
{
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
return viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) sideInputForWindow));
}
default:
throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
}
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class SparkCombineFnTest method testSlidingCombineFnNonMerging.
@Test
public void testSlidingCombineFnNonMerging() throws Exception {
WindowingStrategy<Object, IntervalWindow> strategy = WindowingStrategy.of(SlidingWindows.of(Duration.millis(3000)).every(Duration.millis(1000)));
SparkCombineFn<KV<String, Integer>, Integer, Long, Long> sparkCombineFn = SparkCombineFn.keyed(combineFn, opts, Collections.emptyMap(), strategy, SparkCombineFn.WindowedAccumulator.Type.NON_MERGING);
Instant now = Instant.ofEpochMilli(0);
WindowedValue<KV<String, Integer>> first = input("key", 1, now.plus(Duration.millis(5000)), strategy.getWindowFn());
WindowedValue<KV<String, Integer>> second = input("key", 2, now.plus(Duration.millis(1500)), strategy.getWindowFn());
WindowedValue<KV<String, Integer>> third = input("key", 3, now.plus(Duration.millis(500)), strategy.getWindowFn());
SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c1 = sparkCombineFn.createCombiner(first);
SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c2 = sparkCombineFn.createCombiner(third);
sparkCombineFn.mergeValue(c1, second);
SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c3 = sparkCombineFn.mergeCombiners(c1, c2);
Iterable<WindowedValue<Long>> output = sparkCombineFn.extractOutput(c3);
assertEquals(7, Iterables.size(output));
List<String> format = StreamSupport.stream(output.spliterator(), false).map(val -> val.getValue() + ":" + val.getTimestamp().getMillis()).collect(Collectors.toList());
assertUnorderedEquals(Lists.newArrayList("3:999", "5:1999", "5:2999", "2:3999", "1:5999", "1:6999", "1:7999"), format);
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class ReshuffleTest method testReshufflePreservesTimestamps.
/**
* Tests that timestamps are preserved after applying a {@link Reshuffle} with the default {@link
* WindowingStrategy}.
*/
@Test
@Category(ValidatesRunner.class)
public void testReshufflePreservesTimestamps() {
PCollection<KV<String, TimestampedValue<String>>> input = pipeline.apply(Create.timestamped(TimestampedValue.of("foo", BoundedWindow.TIMESTAMP_MIN_VALUE), TimestampedValue.of("foo", new Instant(0)), TimestampedValue.of("bar", new Instant(33)), TimestampedValue.of("bar", GlobalWindow.INSTANCE.maxTimestamp())).withCoder(StringUtf8Coder.of())).apply(WithKeys.<String, String>of(input12 -> input12).withKeyType(TypeDescriptors.strings())).apply("ReifyOriginalTimestamps", Reify.timestampsInValue());
// The outer TimestampedValue is the reified timestamp post-reshuffle. The inner
// TimestampedValue is the pre-reshuffle timestamp.
PCollection<TimestampedValue<TimestampedValue<String>>> output = input.apply(Reshuffle.of()).apply("ReifyReshuffledTimestamps", Reify.timestampsInValue()).apply(Values.create());
PAssert.that(output).satisfies(input1 -> {
for (TimestampedValue<TimestampedValue<String>> elem : input1) {
Instant originalTimestamp = elem.getValue().getTimestamp();
Instant afterReshuffleTimestamp = elem.getTimestamp();
assertThat("Reshuffle must preserve element timestamps", afterReshuffleTimestamp, equalTo(originalTimestamp));
}
return null;
});
pipeline.run();
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createOperationTransformForFetchAndFilterStreamingSideInputNodes.
private Function<Node, Node> createOperationTransformForFetchAndFilterStreamingSideInputNodes(MutableNetwork<Node, Edge> network, IdGenerator idGenerator, InstructionRequestHandler instructionRequestHandler, FnDataService beamFnDataService, Endpoints.ApiServiceDescriptor dataApiServiceDescriptor, DataflowExecutionContext executionContext, String stageName) {
return new TypeSafeNodeFunction<FetchAndFilterStreamingSideInputsNode>(FetchAndFilterStreamingSideInputsNode.class) {
@Override
public Node typedApply(FetchAndFilterStreamingSideInputsNode input) {
OutputReceiverNode output = (OutputReceiverNode) Iterables.getOnlyElement(network.successors(input));
DataflowOperationContext operationContext = executionContext.createOperationContext(NameContext.create(stageName, input.getNameContext().originalName(), input.getNameContext().systemName(), input.getNameContext().userName()));
return OperationNode.create(new FetchAndFilterStreamingSideInputsOperation<>(new OutputReceiver[] { output.getOutputReceiver() }, operationContext, instructionRequestHandler, beamFnDataService, dataApiServiceDescriptor, idGenerator, (Coder<WindowedValue<Object>>) output.getCoder(), (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy(), executionContext.getStepContext(operationContext), input.getPCollectionViewsToWindowMappingFns()));
}
};
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class BatchGroupAlsoByWindowReshuffleDoFnTest method makeRunner.
private static <K, InputT, OutputT, W extends BoundedWindow> DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> makeRunner(GroupAlsoByWindowDoFnFactory<K, InputT, OutputT> fnFactory, WindowingStrategy<?, W> windowingStrategy, TupleTag<KV<K, OutputT>> outputTag, DoFnRunners.OutputManager outputManager) {
final StepContext stepContext = new TestStepContext(STEP_NAME);
StateInternalsFactory<K> stateInternalsFactory = key -> stepContext.stateInternals();
BatchGroupAlsoByWindowFn<K, InputT, OutputT> fn = fnFactory.forStrategy(windowingStrategy, stateInternalsFactory);
return new GroupAlsoByWindowFnRunner<>(PipelineOptionsFactory.create(), fn, NullSideInputReader.empty(), outputManager, outputTag, stepContext);
}
Aggregations