Search in sources :

Example 41 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class SplittableProcessElementsEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
    ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
    DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
    processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
    String stepName = evaluationContext.getStepName(application);
    final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
    final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
    processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public StateInternals stateInternalsForKey(String key) {
            return (StateInternals) stepContext.stateInternals();
        }
    });
    processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {

        @Override
        public TimerInternals timerInternalsForKey(String key) {
            return stepContext.timerInternals();
        }
    });
    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {

        private final OutputManager outputManager = parDoEvaluator.getOutputManager();

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
    Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
    return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
Also used : ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) TupleTag(org.apache.beam.sdk.values.TupleTag) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ElementAndRestriction(org.apache.beam.runners.core.construction.ElementAndRestriction) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) Instant(org.joda.time.Instant) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 42 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class StatefulParDoEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<K, KV<K, InputT>>> createEvaluator(AppliedPTransform<PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>>, PCollectionTuple, StatefulParDo<K, InputT, OutputT>> application, CommittedBundle<KeyedWorkItem<K, KV<K, InputT>>> inputBundle) throws Exception {
    final DoFn<KV<K, InputT>, OutputT> doFn = application.getTransform().getUnderlyingParDo().getFn();
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    // cache is used to limit the number of tasks to avoid performance degradation.
    if (signature.stateDeclarations().size() > 0) {
        for (final WindowedValue<?> element : inputBundle.getElements()) {
            for (final BoundedWindow window : element.getWindows()) {
                cleanupRegistry.get(AppliedPTransformOutputKeyAndWindow.create(application, (StructuralKey<K>) inputBundle.getKey(), window));
            }
        }
    }
    DoFnLifecycleManagerRemovingTransformEvaluator<KV<K, InputT>> delegateEvaluator = delegateFactory.createEvaluator((AppliedPTransform) application, inputBundle.getKey(), doFn, application.getTransform().getUnderlyingParDo().getSideInputs(), application.getTransform().getUnderlyingParDo().getMainOutputTag(), application.getTransform().getUnderlyingParDo().getAdditionalOutputTags().getAll());
    return new StatefulParDoEvaluator<>(delegateEvaluator);
}
Also used : BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) KV(org.apache.beam.sdk.values.KV) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 43 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class CopyOnAccessInMemoryStateInternalsTest method testGetEarliestWatermarkHoldWithEarliestInUnderlyingTable.

@Test
public void testGetEarliestWatermarkHoldWithEarliestInUnderlyingTable() {
    BoundedWindow first = new BoundedWindow() {

        @Override
        public Instant maxTimestamp() {
            return new Instant(2048L);
        }
    };
    BoundedWindow second = new BoundedWindow() {

        @Override
        public Instant maxTimestamp() {
            return new Instant(689743L);
        }
    };
    CopyOnAccessInMemoryStateInternals<String> underlying = CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
    StateTag<WatermarkHoldState> firstHoldAddress = StateTags.watermarkStateInternal("foo", TimestampCombiner.EARLIEST);
    WatermarkHoldState firstHold = underlying.state(StateNamespaces.window(null, first), firstHoldAddress);
    firstHold.add(new Instant(22L));
    CopyOnAccessInMemoryStateInternals<String> internals = CopyOnAccessInMemoryStateInternals.withUnderlying("foo", underlying.commit());
    StateTag<WatermarkHoldState> secondHoldAddress = StateTags.watermarkStateInternal("foo", TimestampCombiner.EARLIEST);
    WatermarkHoldState secondHold = internals.state(StateNamespaces.window(null, second), secondHoldAddress);
    secondHold.add(new Instant(244L));
    internals.commit();
    assertThat(internals.getEarliestWatermarkHold(), equalTo(new Instant(22L)));
}
Also used : Instant(org.joda.time.Instant) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) Test(org.junit.Test)

Example 44 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class CopyOnAccessInMemoryStateInternalsTest method testGetEarliestWatermarkHoldAfterCommit.

@Test
public void testGetEarliestWatermarkHoldAfterCommit() {
    BoundedWindow first = new BoundedWindow() {

        @Override
        public Instant maxTimestamp() {
            return new Instant(2048L);
        }
    };
    BoundedWindow second = new BoundedWindow() {

        @Override
        public Instant maxTimestamp() {
            return new Instant(689743L);
        }
    };
    CopyOnAccessInMemoryStateInternals<String> internals = CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
    StateTag<WatermarkHoldState> firstHoldAddress = StateTags.watermarkStateInternal("foo", TimestampCombiner.EARLIEST);
    WatermarkHoldState firstHold = internals.state(StateNamespaces.window(null, first), firstHoldAddress);
    firstHold.add(new Instant(22L));
    StateTag<WatermarkHoldState> secondHoldAddress = StateTags.watermarkStateInternal("foo", TimestampCombiner.EARLIEST);
    WatermarkHoldState secondHold = internals.state(StateNamespaces.window(null, second), secondHoldAddress);
    secondHold.add(new Instant(2L));
    internals.commit();
    assertThat(internals.getEarliestWatermarkHold(), equalTo(new Instant(2L)));
}
Also used : Instant(org.joda.time.Instant) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) Test(org.junit.Test)

Example 45 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class CopyOnAccessInMemoryStateInternalsTest method testGetEarliestWatermarkHoldWithEarliestInNewTable.

@Test
public void testGetEarliestWatermarkHoldWithEarliestInNewTable() {
    BoundedWindow first = new BoundedWindow() {

        @Override
        public Instant maxTimestamp() {
            return new Instant(2048L);
        }
    };
    BoundedWindow second = new BoundedWindow() {

        @Override
        public Instant maxTimestamp() {
            return new Instant(689743L);
        }
    };
    CopyOnAccessInMemoryStateInternals<String> underlying = CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
    StateTag<WatermarkHoldState> firstHoldAddress = StateTags.watermarkStateInternal("foo", TimestampCombiner.EARLIEST);
    WatermarkHoldState firstHold = underlying.state(StateNamespaces.window(null, first), firstHoldAddress);
    firstHold.add(new Instant(224L));
    CopyOnAccessInMemoryStateInternals<String> internals = CopyOnAccessInMemoryStateInternals.withUnderlying("foo", underlying.commit());
    StateTag<WatermarkHoldState> secondHoldAddress = StateTags.watermarkStateInternal("foo", TimestampCombiner.EARLIEST);
    WatermarkHoldState secondHold = internals.state(StateNamespaces.window(null, second), secondHoldAddress);
    secondHold.add(new Instant(24L));
    internals.commit();
    assertThat(internals.getEarliestWatermarkHold(), equalTo(new Instant(24L)));
}
Also used : Instant(org.joda.time.Instant) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) Test(org.junit.Test)

Aggregations

BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)54 Instant (org.joda.time.Instant)27 Test (org.junit.Test)26 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)21 KV (org.apache.beam.sdk.values.KV)20 WindowedValue (org.apache.beam.sdk.util.WindowedValue)14 ArrayList (java.util.ArrayList)7 TimerSpec (org.apache.beam.sdk.state.TimerSpec)7 Timer (org.apache.beam.sdk.state.Timer)6 Matchers.containsString (org.hamcrest.Matchers.containsString)6 DoFn (org.apache.beam.sdk.transforms.DoFn)5 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)5 ImmutableList (com.google.common.collect.ImmutableList)4 List (java.util.List)4 ValueState (org.apache.beam.sdk.state.ValueState)4 OnTimer (org.apache.beam.sdk.transforms.DoFn.OnTimer)4 TimestampCombiner (org.apache.beam.sdk.transforms.windowing.TimestampCombiner)4 PCollection (org.apache.beam.sdk.values.PCollection)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Duration (org.joda.time.Duration)4