use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class StatefulDoFnRunnerTest method testGarbageCollect.
@Test
public void testGarbageCollect() throws Exception {
timerInternals.advanceInputWatermark(new Instant(1L));
MyDoFn fn = new MyDoFn();
StateTag<ValueState<Integer>> stateTag = StateTags.tagForSpec(fn.stateId, fn.intState);
DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
Instant elementTime = new Instant(1);
// first element, key is hello, WINDOW_1
runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_1, PaneInfo.NO_FIRING));
assertEquals(1, (int) stateInternals.state(windowNamespace(WINDOW_1), stateTag).read());
// second element, key is hello, WINDOW_2
runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
// advance watermark past end of WINDOW_1 + allowed lateness
// the cleanup timer is set to window.maxTimestamp() + allowed lateness + 1
// to ensure that state is still available when a user timer for window.maxTimestamp() fires
advanceInputWatermark(timerInternals, WINDOW_1.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
1), runner);
assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_1), stateTag)));
assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
// advance watermark past end of WINDOW_2 + allowed lateness
advanceInputWatermark(timerInternals, WINDOW_2.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
1), runner);
assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_2), stateTag)));
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class StatefulDoFnRunnerTest method testLateDropping.
@Test
public void testLateDropping() throws Exception {
MetricsContainerImpl container = new MetricsContainerImpl("any");
MetricsEnvironment.setCurrentContainer(container);
timerInternals.advanceInputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
timerInternals.advanceOutputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
DoFn<KV<String, Integer>, Integer> fn = new MyDoFn();
DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
runner.startBundle();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(0L + WINDOW_SIZE));
Instant timestamp = new Instant(0);
runner.processElement(WindowedValue.of(KV.of("hello", 1), timestamp, window, PaneInfo.NO_FIRING));
long droppedValues = container.getCounter(MetricName.named(StatefulDoFnRunner.class, StatefulDoFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER)).getCumulative().longValue();
assertEquals(1L, droppedValues);
runner.finishBundle();
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class Create method getDefaultCreateCoder.
private static <T> Coder<T> getDefaultCreateCoder(CoderRegistry registry, Iterable<T> elems) throws CannotProvideCoderException {
checkArgument(!Iterables.isEmpty(elems), "Can not determine a default Coder for a 'Create' PTransform that " + "has no elements. Either add elements, call Create.empty(Coder)," + " Create.empty(TypeDescriptor), or call 'withCoder(Coder)' or " + "'withType(TypeDescriptor)' on the PTransform.");
// First try to deduce a coder using the types of the elements.
Class<?> elementClazz = Void.class;
for (T elem : elems) {
if (elem == null) {
continue;
}
Class<?> clazz = elem.getClass();
if (elementClazz.equals(Void.class)) {
elementClazz = clazz;
} else if (!elementClazz.equals(clazz)) {
// Elements are not the same type, require a user-specified coder.
throw new CannotProvideCoderException(String.format("Cannot provide coder for %s: The elements are not all of the same class.", Create.class.getSimpleName()));
}
}
if (elementClazz.getTypeParameters().length == 0) {
try {
// elementClazz is a wildcard type
@SuppressWarnings("unchecked") Coder<T> coder = (Coder<T>) registry.getCoder(TypeDescriptor.of(elementClazz));
return coder;
} catch (CannotProvideCoderException exc) {
// Can't get a coder from the class of the elements, try with the elements next
}
}
// If that fails, try to deduce a coder using the elements themselves
return (Coder<T>) inferCoderFromObjects(registry, elems);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class SdkComponentsTest method translatePipeline.
@Test
public void translatePipeline() {
BigEndianLongCoder customCoder = BigEndianLongCoder.of();
PCollection<Long> elems = pipeline.apply(GenerateSequence.from(0L).to(207L));
PCollection<Long> counted = elems.apply(Count.<Long>globally()).setCoder(customCoder);
PCollection<Long> windowed = counted.apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(19))).accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(3L)));
final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.<String, Long>of("foo"));
PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.<String, Long>create());
final RunnerApi.Pipeline pipelineProto = SdkComponents.translatePipeline(pipeline);
pipeline.traverseTopologically(new PipelineVisitor.Defaults() {
Set<Node> transforms = new HashSet<>();
Set<PCollection<?>> pcollections = new HashSet<>();
Set<Equivalence.Wrapper<? extends Coder<?>>> coders = new HashSet<>();
Set<WindowingStrategy<?, ?>> windowingStrategies = new HashSet<>();
@Override
public void leaveCompositeTransform(Node node) {
if (node.isRootNode()) {
assertThat("Unexpected number of PTransforms", pipelineProto.getComponents().getTransformsCount(), equalTo(transforms.size()));
assertThat("Unexpected number of PCollections", pipelineProto.getComponents().getPcollectionsCount(), equalTo(pcollections.size()));
assertThat("Unexpected number of Coders", pipelineProto.getComponents().getCodersCount(), equalTo(coders.size()));
assertThat("Unexpected number of Windowing Strategies", pipelineProto.getComponents().getWindowingStrategiesCount(), equalTo(windowingStrategies.size()));
} else {
transforms.add(node);
}
}
@Override
public void visitPrimitiveTransform(Node node) {
transforms.add(node);
}
@Override
public void visitValue(PValue value, Node producer) {
if (value instanceof PCollection) {
PCollection pc = (PCollection) value;
pcollections.add(pc);
addCoders(pc.getCoder());
windowingStrategies.add(pc.getWindowingStrategy());
addCoders(pc.getWindowingStrategy().getWindowFn().windowCoder());
}
}
private void addCoders(Coder<?> coder) {
coders.add(Equivalence.<Coder<?>>identity().wrap(coder));
if (coder instanceof StructuredCoder) {
for (Coder<?> component : ((StructuredCoder<?>) coder).getComponents()) {
addCoders(component);
}
}
}
});
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class CoderTranslation method fromKnownCoder.
private static Coder<?> fromKnownCoder(RunnerApi.Coder coder, Components components) throws IOException {
String coderUrn = coder.getSpec().getSpec().getUrn();
List<Coder<?>> coderComponents = new LinkedList<>();
for (String componentId : coder.getComponentCoderIdsList()) {
Coder<?> innerCoder = fromProto(components.getCodersOrThrow(componentId), components);
coderComponents.add(innerCoder);
}
Class<? extends StructuredCoder> coderType = KNOWN_CODER_URNS.inverse().get(coderUrn);
CoderTranslator<?> translator = KNOWN_TRANSLATORS.get(coderType);
checkArgument(translator != null, "Unknown Coder URN %s. Known URNs: %s", coderUrn, KNOWN_CODER_URNS.values());
return translator.fromComponents(coderComponents);
}
Aggregations