use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class SdkComponentsTest method translatePipeline.
@Test
public void translatePipeline() {
BigEndianLongCoder customCoder = BigEndianLongCoder.of();
PCollection<Long> elems = pipeline.apply(GenerateSequence.from(0L).to(207L));
PCollection<Long> counted = elems.apply(Count.<Long>globally()).setCoder(customCoder);
PCollection<Long> windowed = counted.apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(19))).accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(3L)));
final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.<String, Long>of("foo"));
PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.<String, Long>create());
final RunnerApi.Pipeline pipelineProto = SdkComponents.translatePipeline(pipeline);
pipeline.traverseTopologically(new PipelineVisitor.Defaults() {
Set<Node> transforms = new HashSet<>();
Set<PCollection<?>> pcollections = new HashSet<>();
Set<Equivalence.Wrapper<? extends Coder<?>>> coders = new HashSet<>();
Set<WindowingStrategy<?, ?>> windowingStrategies = new HashSet<>();
@Override
public void leaveCompositeTransform(Node node) {
if (node.isRootNode()) {
assertThat("Unexpected number of PTransforms", pipelineProto.getComponents().getTransformsCount(), equalTo(transforms.size()));
assertThat("Unexpected number of PCollections", pipelineProto.getComponents().getPcollectionsCount(), equalTo(pcollections.size()));
assertThat("Unexpected number of Coders", pipelineProto.getComponents().getCodersCount(), equalTo(coders.size()));
assertThat("Unexpected number of Windowing Strategies", pipelineProto.getComponents().getWindowingStrategiesCount(), equalTo(windowingStrategies.size()));
} else {
transforms.add(node);
}
}
@Override
public void visitPrimitiveTransform(Node node) {
transforms.add(node);
}
@Override
public void visitValue(PValue value, Node producer) {
if (value instanceof PCollection) {
PCollection pc = (PCollection) value;
pcollections.add(pc);
addCoders(pc.getCoder());
windowingStrategies.add(pc.getWindowingStrategy());
addCoders(pc.getWindowingStrategy().getWindowFn().windowCoder());
}
}
private void addCoders(Coder<?> coder) {
coders.add(Equivalence.<Coder<?>>identity().wrap(coder));
if (coder instanceof StructuredCoder) {
for (Coder<?> component : ((StructuredCoder<?>) coder).getComponents()) {
addCoders(component);
}
}
}
});
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class StreamingTransformTranslator method groupByKey.
private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() {
return new TransformEvaluator<GroupByKey<K, V>>() {
@Override
public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
@SuppressWarnings("unchecked") UnboundedDataset<KV<K, V>> inputDataset = (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform);
List<Integer> streamSources = inputDataset.getStreamSources();
JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream();
@SuppressWarnings("unchecked") final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
@SuppressWarnings("unchecked") final WindowingStrategy<?, W> windowingStrategy = (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
@SuppressWarnings("unchecked") final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();
//--- coders.
final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
//--- group by key only.
JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupedByKeyStream = dStream.transform(new Function<JavaRDD<WindowedValue<KV<K, V>>>, JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>>>() {
@Override
public JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> call(JavaRDD<WindowedValue<KV<K, V>>> rdd) throws Exception {
return GroupCombineFunctions.groupByKeyOnly(rdd, coder.getKeyCoder(), wvCoder);
}
});
//--- now group also by window.
JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream = SparkGroupAlsoByWindowViaWindowSet.groupAlsoByWindow(groupedByKeyStream, coder.getKeyCoder(), wvCoder, windowingStrategy, runtimeContext, streamSources);
context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources));
}
@Override
public String toNativeString() {
return "groupByKey()";
}
};
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class AssignWindowTranslatorBatch method translateNode.
@Override
public void translateNode(Window.Assign<T> transform, Twister2BatchTranslationContext context) {
BatchTSetImpl<WindowedValue<T>> inputTTSet = context.getInputDataSet(context.getInput(transform));
final WindowingStrategy<T, BoundedWindow> windowingStrategy = (WindowingStrategy<T, BoundedWindow>) context.getOutput(transform).getWindowingStrategy();
WindowFn<T, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
ComputeTSet<WindowedValue<T>, Iterator<WindowedValue<T>>> outputTSet = inputTTSet.direct().compute(new AssignWindowsFunction(windowFn, context.getOptions()));
context.setOutputDataSet(context.getOutput(transform), outputTSet);
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class GroupByKeyTranslatorBatch method translateNode.
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
PCollection<KV<K, V>> input = context.getInput(transform);
BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
final KvCoder<K, V> coder = (KvCoder<K, V>) input.getCoder();
Coder<K> inputKeyCoder = coder.getKeyCoder();
WindowingStrategy windowingStrategy = input.getWindowingStrategy();
WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));
// todo add support for a partition function to be specified, this would use
// todo keyedPartition function instead of KeyedGather
ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));
// --- now group also by window.
SystemReduceFnBuffering reduceFnBuffering = new SystemReduceFnBuffering(coder.getValueCoder());
ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>> outputTset = groupedbyKeyTset.direct().<WindowedValue<KV<K, Iterable<V>>>>flatmap(new GroupByWindowFunction(windowingStrategy, reduceFnBuffering, context.getOptions()));
PCollection output = context.getOutput(transform);
context.setOutputDataSet(output, outputTset);
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class DoFnFunction method prepareSerialization.
/**
* prepares the DoFnFunction class so it can be serialized properly. This involves using various
* protobuf's and byte arrays which are later converted back into the proper classes during
* deserialization.
*/
private void prepareSerialization() {
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipelineOptions.as(PortablePipelineOptions.class)));
this.serializedOptions = new SerializablePipelineOptions(pipelineOptions).toString();
doFnwithEx = ParDoTranslation.translateDoFn(this.doFn, mainOutput, sideInputMapping, doFnSchemaInformation, components);
doFnwithExBytes = doFnwithEx.getPayload().toByteArray();
outputCodersBytes = new HashMap<>();
try {
coderBytes = SerializableUtils.serializeToByteArray(inputCoder);
windowStrategyProto = WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
windowBytes = windowStrategyProto.toByteArray();
for (Map.Entry<TupleTag<?>, Coder<?>> entry : outputCoders.entrySet()) {
outputCodersBytes.put(entry.getKey().getId(), SerializableUtils.serializeToByteArray(entry.getValue()));
}
sideInputBytes = new HashMap<>();
for (Map.Entry<TupleTag<?>, WindowingStrategy<?, ?>> entry : sideInputs.entrySet()) {
windowStrategyProto = WindowingStrategyTranslation.toMessageProto(entry.getValue(), components);
sideInputBytes.put(entry.getKey().getId(), windowStrategyProto.toByteArray());
}
serializedSideOutputs = new ArrayList<>();
for (TupleTag<?> sideOutput : sideOutputs) {
serializedSideOutputs.add(sideOutput.getId());
}
serializedOutputMap = new HashMap<>();
for (Map.Entry<TupleTag<?>, Integer> entry : outputMap.entrySet()) {
serializedOutputMap.put(entry.getKey().getId(), entry.getValue());
}
} catch (IOException e) {
LOG.info(e.getMessage());
}
}
Aggregations