Search in sources :

Example 1 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class SdkComponentsTest method translatePipeline.

@Test
public void translatePipeline() {
    BigEndianLongCoder customCoder = BigEndianLongCoder.of();
    PCollection<Long> elems = pipeline.apply(GenerateSequence.from(0L).to(207L));
    PCollection<Long> counted = elems.apply(Count.<Long>globally()).setCoder(customCoder);
    PCollection<Long> windowed = counted.apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(19))).accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(3L)));
    final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
    PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.<String, Long>of("foo"));
    PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.<String, Long>create());
    final RunnerApi.Pipeline pipelineProto = SdkComponents.translatePipeline(pipeline);
    pipeline.traverseTopologically(new PipelineVisitor.Defaults() {

        Set<Node> transforms = new HashSet<>();

        Set<PCollection<?>> pcollections = new HashSet<>();

        Set<Equivalence.Wrapper<? extends Coder<?>>> coders = new HashSet<>();

        Set<WindowingStrategy<?, ?>> windowingStrategies = new HashSet<>();

        @Override
        public void leaveCompositeTransform(Node node) {
            if (node.isRootNode()) {
                assertThat("Unexpected number of PTransforms", pipelineProto.getComponents().getTransformsCount(), equalTo(transforms.size()));
                assertThat("Unexpected number of PCollections", pipelineProto.getComponents().getPcollectionsCount(), equalTo(pcollections.size()));
                assertThat("Unexpected number of Coders", pipelineProto.getComponents().getCodersCount(), equalTo(coders.size()));
                assertThat("Unexpected number of Windowing Strategies", pipelineProto.getComponents().getWindowingStrategiesCount(), equalTo(windowingStrategies.size()));
            } else {
                transforms.add(node);
            }
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            transforms.add(node);
        }

        @Override
        public void visitValue(PValue value, Node producer) {
            if (value instanceof PCollection) {
                PCollection pc = (PCollection) value;
                pcollections.add(pc);
                addCoders(pc.getCoder());
                windowingStrategies.add(pc.getWindowingStrategy());
                addCoders(pc.getWindowingStrategy().getWindowFn().windowCoder());
            }
        }

        private void addCoders(Coder<?> coder) {
            coders.add(Equivalence.<Coder<?>>identity().wrap(coder));
            if (coder instanceof StructuredCoder) {
                for (Coder<?> component : ((StructuredCoder<?>) coder).getComponents()) {
                    addCoders(component);
                }
            }
        }
    });
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) RunnerApi(org.apache.beam.sdk.common.runner.v1.RunnerApi) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) HashSet(java.util.HashSet) Coder(org.apache.beam.sdk.coders.Coder) SetCoder(org.apache.beam.sdk.coders.SetCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) KV(org.apache.beam.sdk.values.KV) PValue(org.apache.beam.sdk.values.PValue) PCollection(org.apache.beam.sdk.values.PCollection) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) Test(org.junit.Test)

Example 2 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class StreamingTransformTranslator method groupByKey.

private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() {
    return new TransformEvaluator<GroupByKey<K, V>>() {

        @Override
        public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
            @SuppressWarnings("unchecked") UnboundedDataset<KV<K, V>> inputDataset = (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform);
            List<Integer> streamSources = inputDataset.getStreamSources();
            JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream();
            @SuppressWarnings("unchecked") final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
            final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
            @SuppressWarnings("unchecked") final WindowingStrategy<?, W> windowingStrategy = (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
            @SuppressWarnings("unchecked") final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();
            //--- coders.
            final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
            //--- group by key only.
            JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupedByKeyStream = dStream.transform(new Function<JavaRDD<WindowedValue<KV<K, V>>>, JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>>>() {

                @Override
                public JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> call(JavaRDD<WindowedValue<KV<K, V>>> rdd) throws Exception {
                    return GroupCombineFunctions.groupByKeyOnly(rdd, coder.getKeyCoder(), wvCoder);
                }
            });
            //--- now group also by window.
            JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream = SparkGroupAlsoByWindowViaWindowSet.groupAlsoByWindow(groupedByKeyStream, coder.getKeyCoder(), wvCoder, windowingStrategy, runtimeContext, streamSources);
            context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources));
        }

        @Override
        public String toNativeString() {
            return "groupByKey()";
        }
    };
}
Also used : GroupByKey(org.apache.beam.sdk.transforms.GroupByKey) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) SparkRuntimeContext(org.apache.beam.runners.spark.translation.SparkRuntimeContext) WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) SparkAssignWindowFn(org.apache.beam.runners.spark.translation.SparkAssignWindowFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) TransformEvaluator(org.apache.beam.runners.spark.translation.TransformEvaluator) JavaRDD(org.apache.spark.api.java.JavaRDD) EvaluationContext(org.apache.beam.runners.spark.translation.EvaluationContext)

Example 3 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class AssignWindowTranslatorBatch method translateNode.

@Override
public void translateNode(Window.Assign<T> transform, Twister2BatchTranslationContext context) {
    BatchTSetImpl<WindowedValue<T>> inputTTSet = context.getInputDataSet(context.getInput(transform));
    final WindowingStrategy<T, BoundedWindow> windowingStrategy = (WindowingStrategy<T, BoundedWindow>) context.getOutput(transform).getWindowingStrategy();
    WindowFn<T, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
    ComputeTSet<WindowedValue<T>, Iterator<WindowedValue<T>>> outputTSet = inputTTSet.direct().compute(new AssignWindowsFunction(windowFn, context.getOptions()));
    context.setOutputDataSet(context.getOutput(transform), outputTSet);
}
Also used : AssignWindowsFunction(org.apache.beam.runners.twister2.translators.functions.AssignWindowsFunction) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Iterator(java.util.Iterator) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy)

Example 4 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class GroupByKeyTranslatorBatch method translateNode.

@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
    PCollection<KV<K, V>> input = context.getInput(transform);
    BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
    final KvCoder<K, V> coder = (KvCoder<K, V>) input.getCoder();
    Coder<K> inputKeyCoder = coder.getKeyCoder();
    WindowingStrategy windowingStrategy = input.getWindowingStrategy();
    WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
    final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
    KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));
    // todo add support for a partition function to be specified, this would use
    // todo keyedPartition function instead of KeyedGather
    ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));
    // --- now group also by window.
    SystemReduceFnBuffering reduceFnBuffering = new SystemReduceFnBuffering(coder.getValueCoder());
    ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>> outputTset = groupedbyKeyTset.direct().<WindowedValue<KV<K, Iterable<V>>>>flatmap(new GroupByWindowFunction(windowingStrategy, reduceFnBuffering, context.getOptions()));
    PCollection output = context.getOutput(transform);
    context.setOutputDataSet(output, outputTset);
}
Also used : WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) SystemReduceFnBuffering(org.apache.beam.runners.twister2.translators.functions.internal.SystemReduceFnBuffering) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) PCollection(org.apache.beam.sdk.values.PCollection) ByteToWindowFunction(org.apache.beam.runners.twister2.translators.functions.ByteToWindowFunction) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) Iterator(java.util.Iterator) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) GroupByWindowFunction(org.apache.beam.runners.twister2.translators.functions.GroupByWindowFunction)

Example 5 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class DoFnFunction method prepareSerialization.

/**
 * prepares the DoFnFunction class so it can be serialized properly. This involves using various
 * protobuf's and byte arrays which are later converted back into the proper classes during
 * deserialization.
 */
private void prepareSerialization() {
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipelineOptions.as(PortablePipelineOptions.class)));
    this.serializedOptions = new SerializablePipelineOptions(pipelineOptions).toString();
    doFnwithEx = ParDoTranslation.translateDoFn(this.doFn, mainOutput, sideInputMapping, doFnSchemaInformation, components);
    doFnwithExBytes = doFnwithEx.getPayload().toByteArray();
    outputCodersBytes = new HashMap<>();
    try {
        coderBytes = SerializableUtils.serializeToByteArray(inputCoder);
        windowStrategyProto = WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
        windowBytes = windowStrategyProto.toByteArray();
        for (Map.Entry<TupleTag<?>, Coder<?>> entry : outputCoders.entrySet()) {
            outputCodersBytes.put(entry.getKey().getId(), SerializableUtils.serializeToByteArray(entry.getValue()));
        }
        sideInputBytes = new HashMap<>();
        for (Map.Entry<TupleTag<?>, WindowingStrategy<?, ?>> entry : sideInputs.entrySet()) {
            windowStrategyProto = WindowingStrategyTranslation.toMessageProto(entry.getValue(), components);
            sideInputBytes.put(entry.getKey().getId(), windowStrategyProto.toByteArray());
        }
        serializedSideOutputs = new ArrayList<>();
        for (TupleTag<?> sideOutput : sideOutputs) {
            serializedSideOutputs.add(sideOutput.getId());
        }
        serializedOutputMap = new HashMap<>();
        for (Map.Entry<TupleTag<?>, Integer> entry : outputMap.entrySet()) {
            serializedOutputMap.put(entry.getKey().getId(), entry.getValue());
        }
    } catch (IOException e) {
        LOG.info(e.getMessage());
    }
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) TupleTag(org.apache.beam.sdk.values.TupleTag) IOException(java.io.IOException) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)36 WindowedValue (org.apache.beam.sdk.util.WindowedValue)25 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)21 KV (org.apache.beam.sdk.values.KV)19 KvCoder (org.apache.beam.sdk.coders.KvCoder)17 Coder (org.apache.beam.sdk.coders.Coder)16 List (java.util.List)15 TupleTag (org.apache.beam.sdk.values.TupleTag)14 Instant (org.joda.time.Instant)13 Test (org.junit.Test)13 PCollection (org.apache.beam.sdk.values.PCollection)11 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)9 Map (java.util.Map)9 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)9 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)9 Duration (org.joda.time.Duration)9 IOException (java.io.IOException)8 Collectors (java.util.stream.Collectors)8 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)8