Search in sources :

Example 1 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class StatefulDoFnRunnerTest method testGarbageCollect.

@Test
public void testGarbageCollect() throws Exception {
    timerInternals.advanceInputWatermark(new Instant(1L));
    MyDoFn fn = new MyDoFn();
    StateTag<ValueState<Integer>> stateTag = StateTags.tagForSpec(fn.stateId, fn.intState);
    DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
    Instant elementTime = new Instant(1);
    // first element, key is hello, WINDOW_1
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_1, PaneInfo.NO_FIRING));
    assertEquals(1, (int) stateInternals.state(windowNamespace(WINDOW_1), stateTag).read());
    // second element, key is hello, WINDOW_2
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
    assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
    // advance watermark past end of WINDOW_1 + allowed lateness
    // the cleanup timer is set to window.maxTimestamp() + allowed lateness + 1
    // to ensure that state is still available when a user timer for window.maxTimestamp() fires
    advanceInputWatermark(timerInternals, WINDOW_1.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
    1), runner);
    assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_1), stateTag)));
    assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
    // advance watermark past end of WINDOW_2 + allowed lateness
    advanceInputWatermark(timerInternals, WINDOW_2.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
    1), runner);
    assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_2), stateTag)));
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) ValueState(org.apache.beam.sdk.state.ValueState) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 2 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class StatefulDoFnRunnerTest method testLateDropping.

@Test
public void testLateDropping() throws Exception {
    MetricsContainerImpl container = new MetricsContainerImpl("any");
    MetricsEnvironment.setCurrentContainer(container);
    timerInternals.advanceInputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
    timerInternals.advanceOutputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
    DoFn<KV<String, Integer>, Integer> fn = new MyDoFn();
    DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
    runner.startBundle();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(0L + WINDOW_SIZE));
    Instant timestamp = new Instant(0);
    runner.processElement(WindowedValue.of(KV.of("hello", 1), timestamp, window, PaneInfo.NO_FIRING));
    long droppedValues = container.getCounter(MetricName.named(StatefulDoFnRunner.class, StatefulDoFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER)).getCumulative().longValue();
    assertEquals(1L, droppedValues);
    runner.finishBundle();
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 3 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class SourceTestUtils method assertSplitAtFractionConcurrent.

private static <T> boolean assertSplitAtFractionConcurrent(ExecutorService executor, BoundedSource<T> source, List<T> expectedItems, final int numItemsToReadBeforeSplitting, final double fraction, PipelineOptions options) throws Exception {
    // Closed in readerThread
    @SuppressWarnings("resource") final BoundedSource.BoundedReader<T> reader = source.createReader(options);
    final CountDownLatch unblockSplitter = new CountDownLatch(1);
    Future<List<T>> readerThread = executor.submit(new Callable<List<T>>() {

        @Override
        public List<T> call() throws Exception {
            try {
                List<T> items = readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplitting);
                unblockSplitter.countDown();
                items.addAll(readRemainingFromReader(reader, numItemsToReadBeforeSplitting > 0));
                return items;
            } finally {
                reader.close();
            }
        }
    });
    Future<KV<BoundedSource<T>, BoundedSource<T>>> splitterThread = executor.submit(new Callable<KV<BoundedSource<T>, BoundedSource<T>>>() {

        @Override
        public KV<BoundedSource<T>, BoundedSource<T>> call() throws Exception {
            unblockSplitter.await();
            BoundedSource<T> residual = reader.splitAtFraction(fraction);
            if (residual == null) {
                return null;
            }
            return KV.of(reader.getCurrentSource(), residual);
        }
    });
    List<T> currentItems = readerThread.get();
    KV<BoundedSource<T>, BoundedSource<T>> splitSources = splitterThread.get();
    if (splitSources == null) {
        return false;
    }
    SplitAtFractionResult res = verifySingleSplitAtFractionResult(source, expectedItems, currentItems, splitSources.getKey(), splitSources.getValue(), numItemsToReadBeforeSplitting, fraction, options);
    return (res.numResidualItems > 0);
}
Also used : BoundedSource(org.apache.beam.sdk.io.BoundedSource) KV(org.apache.beam.sdk.values.KV) CountDownLatch(java.util.concurrent.CountDownLatch) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List)

Example 4 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class WithKeys method expand.

@Override
public PCollection<KV<K, V>> expand(PCollection<V> in) {
    PCollection<KV<K, V>> result = in.apply("AddKeys", MapElements.via(new SimpleFunction<V, KV<K, V>>() {

        @Override
        public KV<K, V> apply(V element) {
            return KV.of(fn.apply(element), element);
        }
    }));
    try {
        Coder<K> keyCoder;
        CoderRegistry coderRegistry = in.getPipeline().getCoderRegistry();
        if (keyClass == null) {
            keyCoder = coderRegistry.getOutputCoder(fn, in.getCoder());
        } else {
            keyCoder = coderRegistry.getCoder(TypeDescriptor.of(keyClass));
        }
        // TODO: Remove when we can set the coder inference context.
        result.setCoder(KvCoder.of(keyCoder, in.getCoder()));
    } catch (CannotProvideCoderException exc) {
    // let lazy coder inference have a try
    }
    return result;
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) CannotProvideCoderException(org.apache.beam.sdk.coders.CannotProvideCoderException) KV(org.apache.beam.sdk.values.KV) KV(org.apache.beam.sdk.values.KV)

Example 5 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class GroupByKeyTest method testTimestampCombinerLatest.

/**
   * Tests that when two elements are combined via a GroupByKey their output timestamp agrees
   * with the windowing function customized to use the latest value.
   */
@Test
@Category(ValidatesRunner.class)
public void testTimestampCombinerLatest() {
    p.apply(Create.timestamped(TimestampedValue.of(KV.of(0, "hello"), new Instant(0)), TimestampedValue.of(KV.of(0, "goodbye"), new Instant(10)))).apply(Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(10))).withTimestampCombiner(TimestampCombiner.LATEST)).apply(GroupByKey.<Integer, String>create()).apply(ParDo.of(new AssertTimestamp(new Instant(10))));
    p.run();
}
Also used : Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

KV (org.apache.beam.sdk.values.KV)192 Test (org.junit.Test)143 Instant (org.joda.time.Instant)66 Category (org.junit.experimental.categories.Category)62 Pipeline (org.apache.beam.sdk.Pipeline)35 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)34 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)33 Matchers.containsString (org.hamcrest.Matchers.containsString)33 StateSpec (org.apache.beam.sdk.state.StateSpec)25 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)22 ArrayList (java.util.ArrayList)19 WindowedValue (org.apache.beam.sdk.util.WindowedValue)19 TupleTag (org.apache.beam.sdk.values.TupleTag)16 TableRow (com.google.api.services.bigquery.model.TableRow)15 Map (java.util.Map)15 ValueState (org.apache.beam.sdk.state.ValueState)15 List (java.util.List)14 ImmutableList (com.google.common.collect.ImmutableList)12 HashMap (java.util.HashMap)12 Timer (org.apache.beam.sdk.state.Timer)12