Search in sources :

Example 1 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class StatefulDoFnRunnerTest method testGarbageCollect.

@Test
public void testGarbageCollect() throws Exception {
    timerInternals.advanceInputWatermark(new Instant(1L));
    MyDoFn fn = new MyDoFn();
    StateTag<ValueState<Integer>> stateTag = StateTags.tagForSpec(fn.stateId, fn.intState);
    DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
    Instant elementTime = new Instant(1);
    // first element, key is hello, WINDOW_1
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_1, PaneInfo.NO_FIRING));
    assertEquals(1, (int) stateInternals.state(windowNamespace(WINDOW_1), stateTag).read());
    // second element, key is hello, WINDOW_2
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
    assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
    // advance watermark past end of WINDOW_1 + allowed lateness
    // the cleanup timer is set to window.maxTimestamp() + allowed lateness + 1
    // to ensure that state is still available when a user timer for window.maxTimestamp() fires
    advanceInputWatermark(timerInternals, WINDOW_1.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
    1), runner);
    assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_1), stateTag)));
    assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
    // advance watermark past end of WINDOW_2 + allowed lateness
    advanceInputWatermark(timerInternals, WINDOW_2.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
    1), runner);
    assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_2), stateTag)));
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) ValueState(org.apache.beam.sdk.state.ValueState) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 2 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class StatefulDoFnRunnerTest method testLateDropping.

@Test
public void testLateDropping() throws Exception {
    MetricsContainerImpl container = new MetricsContainerImpl("any");
    MetricsEnvironment.setCurrentContainer(container);
    timerInternals.advanceInputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
    timerInternals.advanceOutputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
    DoFn<KV<String, Integer>, Integer> fn = new MyDoFn();
    DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
    runner.startBundle();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(0L + WINDOW_SIZE));
    Instant timestamp = new Instant(0);
    runner.processElement(WindowedValue.of(KV.of("hello", 1), timestamp, window, PaneInfo.NO_FIRING));
    long droppedValues = container.getCounter(MetricName.named(StatefulDoFnRunner.class, StatefulDoFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER)).getCumulative().longValue();
    assertEquals(1L, droppedValues);
    runner.finishBundle();
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 3 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class Create method getDefaultCreateCoder.

private static <T> Coder<T> getDefaultCreateCoder(CoderRegistry registry, Iterable<T> elems) throws CannotProvideCoderException {
    checkArgument(!Iterables.isEmpty(elems), "Can not determine a default Coder for a 'Create' PTransform that " + "has no elements.  Either add elements, call Create.empty(Coder)," + " Create.empty(TypeDescriptor), or call 'withCoder(Coder)' or " + "'withType(TypeDescriptor)' on the PTransform.");
    // First try to deduce a coder using the types of the elements.
    Class<?> elementClazz = Void.class;
    for (T elem : elems) {
        if (elem == null) {
            continue;
        }
        Class<?> clazz = elem.getClass();
        if (elementClazz.equals(Void.class)) {
            elementClazz = clazz;
        } else if (!elementClazz.equals(clazz)) {
            // Elements are not the same type, require a user-specified coder.
            throw new CannotProvideCoderException(String.format("Cannot provide coder for %s: The elements are not all of the same class.", Create.class.getSimpleName()));
        }
    }
    if (elementClazz.getTypeParameters().length == 0) {
        try {
            // elementClazz is a wildcard type
            @SuppressWarnings("unchecked") Coder<T> coder = (Coder<T>) registry.getCoder(TypeDescriptor.of(elementClazz));
            return coder;
        } catch (CannotProvideCoderException exc) {
        // Can't get a coder from the class of the elements, try with the elements next
        }
    }
    // If that fails, try to deduce a coder using the elements themselves
    return (Coder<T>) inferCoderFromObjects(registry, elems);
}
Also used : TimestampedValueCoder(org.apache.beam.sdk.values.TimestampedValue.TimestampedValueCoder) Coder(org.apache.beam.sdk.coders.Coder) ListCoder(org.apache.beam.sdk.coders.ListCoder) SetCoder(org.apache.beam.sdk.coders.SetCoder) MapCoder(org.apache.beam.sdk.coders.MapCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) CollectionCoder(org.apache.beam.sdk.coders.CollectionCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) CannotProvideCoderException(org.apache.beam.sdk.coders.CannotProvideCoderException)

Example 4 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class SdkComponentsTest method translatePipeline.

@Test
public void translatePipeline() {
    BigEndianLongCoder customCoder = BigEndianLongCoder.of();
    PCollection<Long> elems = pipeline.apply(GenerateSequence.from(0L).to(207L));
    PCollection<Long> counted = elems.apply(Count.<Long>globally()).setCoder(customCoder);
    PCollection<Long> windowed = counted.apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(19))).accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(3L)));
    final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
    PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.<String, Long>of("foo"));
    PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.<String, Long>create());
    final RunnerApi.Pipeline pipelineProto = SdkComponents.translatePipeline(pipeline);
    pipeline.traverseTopologically(new PipelineVisitor.Defaults() {

        Set<Node> transforms = new HashSet<>();

        Set<PCollection<?>> pcollections = new HashSet<>();

        Set<Equivalence.Wrapper<? extends Coder<?>>> coders = new HashSet<>();

        Set<WindowingStrategy<?, ?>> windowingStrategies = new HashSet<>();

        @Override
        public void leaveCompositeTransform(Node node) {
            if (node.isRootNode()) {
                assertThat("Unexpected number of PTransforms", pipelineProto.getComponents().getTransformsCount(), equalTo(transforms.size()));
                assertThat("Unexpected number of PCollections", pipelineProto.getComponents().getPcollectionsCount(), equalTo(pcollections.size()));
                assertThat("Unexpected number of Coders", pipelineProto.getComponents().getCodersCount(), equalTo(coders.size()));
                assertThat("Unexpected number of Windowing Strategies", pipelineProto.getComponents().getWindowingStrategiesCount(), equalTo(windowingStrategies.size()));
            } else {
                transforms.add(node);
            }
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            transforms.add(node);
        }

        @Override
        public void visitValue(PValue value, Node producer) {
            if (value instanceof PCollection) {
                PCollection pc = (PCollection) value;
                pcollections.add(pc);
                addCoders(pc.getCoder());
                windowingStrategies.add(pc.getWindowingStrategy());
                addCoders(pc.getWindowingStrategy().getWindowFn().windowCoder());
            }
        }

        private void addCoders(Coder<?> coder) {
            coders.add(Equivalence.<Coder<?>>identity().wrap(coder));
            if (coder instanceof StructuredCoder) {
                for (Coder<?> component : ((StructuredCoder<?>) coder).getComponents()) {
                    addCoders(component);
                }
            }
        }
    });
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) RunnerApi(org.apache.beam.sdk.common.runner.v1.RunnerApi) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) HashSet(java.util.HashSet) Coder(org.apache.beam.sdk.coders.Coder) SetCoder(org.apache.beam.sdk.coders.SetCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) KV(org.apache.beam.sdk.values.KV) PValue(org.apache.beam.sdk.values.PValue) PCollection(org.apache.beam.sdk.values.PCollection) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) Test(org.junit.Test)

Example 5 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class CoderTranslation method fromKnownCoder.

private static Coder<?> fromKnownCoder(RunnerApi.Coder coder, Components components) throws IOException {
    String coderUrn = coder.getSpec().getSpec().getUrn();
    List<Coder<?>> coderComponents = new LinkedList<>();
    for (String componentId : coder.getComponentCoderIdsList()) {
        Coder<?> innerCoder = fromProto(components.getCodersOrThrow(componentId), components);
        coderComponents.add(innerCoder);
    }
    Class<? extends StructuredCoder> coderType = KNOWN_CODER_URNS.inverse().get(coderUrn);
    CoderTranslator<?> translator = KNOWN_TRANSLATORS.get(coderType);
    checkArgument(translator != null, "Unknown Coder URN %s. Known URNs: %s", coderUrn, KNOWN_CODER_URNS.values());
    return translator.fromComponents(coderComponents);
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) IntervalWindowCoder(org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder) LengthPrefixCoder(org.apache.beam.sdk.coders.LengthPrefixCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) ByteString(com.google.protobuf.ByteString) LinkedList(java.util.LinkedList)

Aggregations

Coder (org.apache.beam.sdk.coders.Coder)117 KvCoder (org.apache.beam.sdk.coders.KvCoder)74 WindowedValue (org.apache.beam.sdk.util.WindowedValue)53 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)44 Test (org.junit.Test)43 HashMap (java.util.HashMap)40 ArrayList (java.util.ArrayList)36 Map (java.util.Map)34 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)34 List (java.util.List)31 KV (org.apache.beam.sdk.values.KV)29 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)28 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)28 PCollection (org.apache.beam.sdk.values.PCollection)28 TupleTag (org.apache.beam.sdk.values.TupleTag)23 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)23 IOException (java.io.IOException)21 PCollectionView (org.apache.beam.sdk.values.PCollectionView)21 Instant (org.joda.time.Instant)21 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)20