Search in sources :

Example 11 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class BatchGroupAlsoByWindowViaOutputBufferFn method processElement.

@Override
public void processElement(KV<K, Iterable<WindowedValue<InputT>>> element, PipelineOptions options, StepContext stepContext, SideInputReader sideInputReader, OutputWindowedValue<KV<K, OutputT>> output) throws Exception {
    K key = element.getKey();
    // Used with Batch, we know that all the data is available for this key. We can't use the
    // timer manager from the context because it doesn't exist. So we create one and emulate the
    // watermark, knowing that we have all data and it is in timestamp order.
    InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
    timerInternals.advanceProcessingTime(Instant.now());
    timerInternals.advanceSynchronizedProcessingTime(Instant.now());
    StateInternals stateInternals = stateInternalsFactory.stateInternalsForKey(key);
    ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner = new ReduceFnRunner<>(key, strategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(TriggerTranslation.toProto(strategy.getTrigger()))), stateInternals, timerInternals, output, NullSideInputReader.empty(), reduceFn, options);
    // Process the elements.
    reduceFnRunner.processElements(element.getValue());
    // Finish any pending windows by advancing the input watermark to infinity.
    timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
    // Finally, advance the processing time to infinity to fire any timers.
    timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
    timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
    fireEligibleTimers(timerInternals, reduceFnRunner);
    reduceFnRunner.persist();
}
Also used : ReduceFnRunner(org.apache.beam.runners.core.ReduceFnRunner) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals)

Example 12 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class UserParDoFnFactoryTest method testCleanupWorks.

@Test
public void testCleanupWorks() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    CounterSet counters = new CounterSet();
    DoFn<?, ?> initialFn = new TestStatefulDoFn();
    CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    StateInternals stateInternals = InMemoryStateInternals.forKey("dummy");
    // The overarching step context that only ParDoFn gets
    DataflowStepContext stepContext = mock(DataflowStepContext.class);
    // The user step context that the DoFnRunner gets a handle on
    DataflowStepContext userStepContext = mock(DataflowStepContext.class);
    when(stepContext.namespacedToUser()).thenReturn(userStepContext);
    when(stepContext.stateInternals()).thenReturn(stateInternals);
    when(userStepContext.stateInternals()).thenReturn((StateInternals) stateInternals);
    DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
    TestOperationContext operationContext = TestOperationContext.create(counters);
    when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
    when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
    ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
    Receiver rcvr = new OutputReceiver();
    parDoFn.startBundle(rcvr);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    StateNamespace firstWindowNamespace = StateNamespaces.window(windowCoder, firstWindow);
    StateNamespace secondWindowNamespace = StateNamespaces.window(windowCoder, secondWindow);
    StateTag<ValueState<String>> tag = StateTags.tagForSpec(TestStatefulDoFn.STATE_ID, StateSpecs.value(StringUtf8Coder.of()));
    // Set up non-empty state. We don't mock + verify calls to clear() but instead
    // check that state is actually empty. We musn't care how it is accomplished.
    stateInternals.state(firstWindowNamespace, tag).write("first");
    stateInternals.state(secondWindowNamespace, tag).write("second");
    when(userStepContext.getNextFiredTimer(windowCoder)).thenReturn(null);
    when(stepContext.getNextFiredTimer(windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindowNamespace, firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
    // This should fire the timer to clean up the first window
    parDoFn.processTimers();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
    when(stepContext.getNextFiredTimer((Coder) windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, secondWindowNamespace, secondWindow.maxTimestamp().plus(Duration.millis(1L)), secondWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
    // And this should clean up the second window
    parDoFn.processTimers();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Instant(org.joda.time.Instant) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DataflowStepContext(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowStepContext) StateNamespace(org.apache.beam.runners.core.StateNamespace) ValueState(org.apache.beam.sdk.state.ValueState) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 13 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class SamzaDoFnRunners method create.

/**
 * Create DoFnRunner for java runner.
 */
public static <InT, FnOutT> DoFnRunner<InT, FnOutT> create(SamzaPipelineOptions pipelineOptions, DoFn<InT, FnOutT> doFn, WindowingStrategy<?, ?> windowingStrategy, String transformFullName, String transformId, Context context, TupleTag<FnOutT> mainOutputTag, SideInputHandler sideInputHandler, SamzaTimerInternalsFactory<?> timerInternalsFactory, Coder<?> keyCoder, DoFnRunners.OutputManager outputManager, Coder<InT> inputCoder, List<TupleTag<?>> sideOutputTags, Map<TupleTag<?>, Coder<?>> outputCoders, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) {
    final KeyedInternals keyedInternals;
    final TimerInternals timerInternals;
    final StateInternals stateInternals;
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    final SamzaStoreStateInternals.Factory<?> stateInternalsFactory = SamzaStoreStateInternals.createStateInternalsFactory(transformId, keyCoder, context.getTaskContext(), pipelineOptions, signature);
    final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    if (StateUtils.isStateful(doFn)) {
        keyedInternals = new KeyedInternals(stateInternalsFactory, timerInternalsFactory);
        stateInternals = keyedInternals.stateInternals();
        timerInternals = keyedInternals.timerInternals();
    } else {
        keyedInternals = null;
        stateInternals = stateInternalsFactory.stateInternalsForKey(null);
        timerInternals = timerInternalsFactory.timerInternalsForKey(null);
    }
    final StepContext stepContext = createStepContext(stateInternals, timerInternals);
    final DoFnRunner<InT, FnOutT> underlyingRunner = DoFnRunners.simpleRunner(pipelineOptions, doFn, sideInputHandler, outputManager, mainOutputTag, sideOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
    final DoFnRunner<InT, FnOutT> doFnRunnerWithMetrics = pipelineOptions.getEnableMetrics() ? DoFnRunnerWithMetrics.wrap(underlyingRunner, executionContext.getMetricsContainer(), transformFullName) : underlyingRunner;
    if (keyedInternals != null) {
        final DoFnRunner<InT, FnOutT> statefulDoFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, inputCoder, doFnRunnerWithMetrics, stepContext, windowingStrategy, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, windowingStrategy), createStateCleaner(doFn, windowingStrategy, keyedInternals.stateInternals()));
        return new DoFnRunnerWithKeyedInternals<>(statefulDoFnRunner, keyedInternals);
    } else {
        return doFnRunnerWithMetrics;
    }
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) StepContext(org.apache.beam.runners.core.StepContext) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 14 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class MultiDoFnFunction method call.

@Override
public Iterator<Tuple2<TupleTag<?>, WindowedValue<?>>> call(Iterator<WindowedValue<InputT>> iter) throws Exception {
    if (!wasSetupCalled && iter.hasNext()) {
        DoFnInvokers.tryInvokeSetupFor(doFn, options.get());
        wasSetupCalled = true;
    }
    DoFnOutputManager outputManager = new DoFnOutputManager();
    final InMemoryTimerInternals timerInternals;
    final StepContext context;
    // Now only implements the StatefulParDo in Batch mode.
    Object key = null;
    if (stateful) {
        if (iter.hasNext()) {
            WindowedValue<InputT> currentValue = iter.next();
            key = ((KV) currentValue.getValue()).getKey();
            iter = Iterators.concat(Iterators.singletonIterator(currentValue), iter);
        }
        final InMemoryStateInternals<?> stateInternals = InMemoryStateInternals.forKey(key);
        timerInternals = new InMemoryTimerInternals();
        context = new StepContext() {

            @Override
            public StateInternals stateInternals() {
                return stateInternals;
            }

            @Override
            public TimerInternals timerInternals() {
                return timerInternals;
            }
        };
    } else {
        timerInternals = null;
        context = new SparkProcessContext.NoOpStepContext();
    }
    final DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(options.get(), doFn, CachedSideInputReader.of(new SparkSideInputReader(sideInputs)), outputManager, mainOutputTag, additionalOutputTags, context, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
    DoFnRunnerWithMetrics<InputT, OutputT> doFnRunnerWithMetrics = new DoFnRunnerWithMetrics<>(stepName, doFnRunner, metricsAccum);
    return new SparkProcessContext<>(doFn, doFnRunnerWithMetrics, outputManager, key, stateful ? new TimerDataIterator(timerInternals) : Collections.emptyIterator()).processPartition(iter).iterator();
}
Also used : StepContext(org.apache.beam.runners.core.StepContext) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) TimerInternals(org.apache.beam.runners.core.TimerInternals) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals) SparkSideInputReader(org.apache.beam.runners.spark.util.SparkSideInputReader)

Example 15 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class GroupByWindowFunction method flatMap.

@Override
public void flatMap(KV<K, Iterable<WindowedValue<V>>> kIteratorKV, RecordCollector<WindowedValue<KV<K, Iterable<V>>>> collector) {
    try {
        K key = kIteratorKV.getKey();
        Iterable<WindowedValue<V>> values = kIteratorKV.getValue();
        InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
        timerInternals.advanceProcessingTime(Instant.now());
        timerInternals.advanceSynchronizedProcessingTime(Instant.now());
        StateInternals stateInternals = InMemoryStateInternals.forKey(key);
        GABWOutputWindowedValue<K, V> outputter = new GABWOutputWindowedValue<>();
        ReduceFnRunner<K, V, Iterable<V>, W> reduceFnRunner = new ReduceFnRunner<>(key, windowingStrategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))), stateInternals, timerInternals, outputter, new UnsupportedSideInputReader("GroupAlsoByWindow"), reduceFn, null);
        // Process the grouped values.
        reduceFnRunner.processElements(values);
        // Finish any pending windows by advancing the input watermark to infinity.
        timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
        // Finally, advance the processing time to infinity to fire any timers.
        timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
        timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
        fireEligibleTimers(timerInternals, reduceFnRunner);
        reduceFnRunner.persist();
        Iterator<WindowedValue<KV<K, Iterable<V>>>> outputs = outputter.getOutputs().iterator();
        while (outputs.hasNext()) {
            collector.collect(outputs.next());
        }
    } catch (Exception e) {
        LOG.info(e.getMessage());
    }
}
Also used : ReduceFnRunner(org.apache.beam.runners.core.ReduceFnRunner) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) IOException(java.io.IOException) ObjectStreamException(java.io.ObjectStreamException) WindowedValue(org.apache.beam.sdk.util.WindowedValue) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) KV(org.apache.beam.sdk.values.KV) UnsupportedSideInputReader(org.apache.beam.runners.core.UnsupportedSideInputReader) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals)

Aggregations

StateInternals (org.apache.beam.runners.core.StateInternals)15 InMemoryTimerInternals (org.apache.beam.runners.core.InMemoryTimerInternals)9 WindowedValue (org.apache.beam.sdk.util.WindowedValue)8 ReduceFnRunner (org.apache.beam.runners.core.ReduceFnRunner)7 InMemoryStateInternals (org.apache.beam.runners.core.InMemoryStateInternals)6 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)6 TimerInternals (org.apache.beam.runners.core.TimerInternals)6 KV (org.apache.beam.sdk.values.KV)6 UnsupportedSideInputReader (org.apache.beam.runners.core.UnsupportedSideInputReader)4 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Instant (org.joda.time.Instant)4 StepContext (org.apache.beam.runners.core.StepContext)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 DoFnRunners (org.apache.beam.runners.core.DoFnRunners)2 StateNamespace (org.apache.beam.runners.core.StateNamespace)2 WindowNamespace (org.apache.beam.runners.core.StateNamespaces.WindowNamespace)2 StatefulDoFnRunner (org.apache.beam.runners.core.StatefulDoFnRunner)2