use of org.apache.beam.sdk.transforms.windowing.WindowFn in project beam by apache.
the class StreamingTransformTranslator method groupByKey.
private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() {
return new TransformEvaluator<GroupByKey<K, V>>() {
@Override
public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
@SuppressWarnings("unchecked") UnboundedDataset<KV<K, V>> inputDataset = (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform);
List<Integer> streamSources = inputDataset.getStreamSources();
JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream();
@SuppressWarnings("unchecked") final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
@SuppressWarnings("unchecked") final WindowingStrategy<?, W> windowingStrategy = (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
@SuppressWarnings("unchecked") final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();
//--- coders.
final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
//--- group by key only.
JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupedByKeyStream = dStream.transform(new Function<JavaRDD<WindowedValue<KV<K, V>>>, JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>>>() {
@Override
public JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> call(JavaRDD<WindowedValue<KV<K, V>>> rdd) throws Exception {
return GroupCombineFunctions.groupByKeyOnly(rdd, coder.getKeyCoder(), wvCoder);
}
});
//--- now group also by window.
JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream = SparkGroupAlsoByWindowViaWindowSet.groupAlsoByWindow(groupedByKeyStream, coder.getKeyCoder(), wvCoder, windowingStrategy, runtimeContext, streamSources);
context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources));
}
@Override
public String toNativeString() {
return "groupByKey()";
}
};
}
use of org.apache.beam.sdk.transforms.windowing.WindowFn in project beam by apache.
the class WindowIntoTranslationTest method testToFromProto.
@Test
public void testToFromProto() throws InvalidProtocolBufferException {
pipeline.apply(GenerateSequence.from(0)).apply(Window.<Long>into((WindowFn) windowFn));
final AtomicReference<AppliedPTransform<?, ?, Assign<?>>> assign = new AtomicReference<>(null);
pipeline.traverseTopologically(new PipelineVisitor.Defaults() {
@Override
public void visitPrimitiveTransform(Node node) {
if (node.getTransform() instanceof Window.Assign) {
checkState(assign.get() == null);
assign.set((AppliedPTransform<?, ?, Assign<?>>) node.toAppliedPTransform(getPipeline()));
}
}
});
checkState(assign.get() != null);
SdkComponents components = SdkComponents.create();
WindowIntoPayload payload = WindowIntoTranslation.toProto(assign.get().getTransform(), components);
assertEquals(windowFn, WindowIntoTranslation.getWindowFn(payload));
}
use of org.apache.beam.sdk.transforms.windowing.WindowFn in project beam by apache.
the class SparkAssignWindowFn method call.
@Override
@SuppressWarnings("unchecked")
public WindowedValue<T> call(WindowedValue<T> windowedValue) throws Exception {
final BoundedWindow boundedWindow = Iterables.getOnlyElement(windowedValue.getWindows());
final T element = windowedValue.getValue();
final Instant timestamp = windowedValue.getTimestamp();
Collection<W> windows = ((WindowFn<T, W>) fn).assignWindows(((WindowFn<T, W>) fn).new AssignContext() {
@Override
public T element() {
return element;
}
@Override
public Instant timestamp() {
return timestamp;
}
@Override
public BoundedWindow window() {
return boundedWindow;
}
});
return WindowedValue.of(element, timestamp, windows, PaneInfo.NO_FIRING);
}
Aggregations