Search in sources :

Example 6 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndBatchSideInputs.

@Test
public void testCreateWithCombinerAndBatchSideInputs() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    Coder keyCoder = StringUtf8Coder.of();
    Coder valueCoder = BigEndianIntegerCoder.of();
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
    TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
    StepContext stepContext = BatchModeExecutionContext.forTesting(options, "testStage").getStepContext(TestOperationContext.create(counterSet));
    when(mockSideInputReader.isEmpty()).thenReturn(false);
    ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, stepContext);
    assertTrue(pgbk instanceof BatchSideInputPGBKParDoFn);
}
Also used : ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) PCollectionView(org.apache.beam.sdk.values.PCollectionView) StepContext(org.apache.beam.runners.core.StepContext) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) Test(org.junit.Test)

Example 7 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class GroupByKeyOp method open.

@Override
public void open(Config config, Context context, Scheduler<KeyedTimerData<K>> timerRegistry, OpEmitter<KV<K, OutputT>> emitter) {
    final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    this.pipelineOptions = samzaExecutionContext.getPipelineOptions();
    final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(transformId, context.getTaskContext(), pipelineOptions);
    final DoFnRunners.OutputManager outputManager = outputManagerFactory.create(emitter);
    this.stateInternalsFactory = new SamzaStoreStateInternals.Factory<>(transformId, Collections.singletonMap(SamzaStoreStateInternals.BEAM_STORE, SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), keyCoder, pipelineOptions.getStoreBatchGetSize());
    this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, timerRegistry, TIMER_STATE_ID, nonKeyedStateInternalsFactory, windowingStrategy, isBounded, pipelineOptions);
    final DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn = GroupAlsoByWindowViaWindowSetNewDoFn.create(windowingStrategy, stateInternalsFactory, timerInternalsFactory, NullSideInputReader.of(Collections.emptyList()), reduceFn, outputManager, mainOutputTag);
    final KeyedInternals<K> keyedInternals = new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory);
    final StepContext stepContext = new StepContext() {

        @Override
        public StateInternals stateInternals() {
            return keyedInternals.stateInternals();
        }

        @Override
        public TimerInternals timerInternals() {
            return keyedInternals.timerInternals();
        }
    };
    final DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner = DoFnRunners.simpleRunner(PipelineOptionsFactory.create(), doFn, NullSideInputReader.of(Collections.emptyList()), outputManager, mainOutputTag, Collections.emptyList(), stepContext, null, Collections.emptyMap(), windowingStrategy, DoFnSchemaInformation.create(), Collections.emptyMap());
    final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    this.fnRunner = DoFnRunnerWithMetrics.wrap(doFnRunner, executionContext.getMetricsContainer(), transformFullName);
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) StepContext(org.apache.beam.runners.core.StepContext) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem)

Example 8 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class SamzaDoFnRunners method create.

/**
 * Create DoFnRunner for java runner.
 */
public static <InT, FnOutT> DoFnRunner<InT, FnOutT> create(SamzaPipelineOptions pipelineOptions, DoFn<InT, FnOutT> doFn, WindowingStrategy<?, ?> windowingStrategy, String transformFullName, String transformId, Context context, TupleTag<FnOutT> mainOutputTag, SideInputHandler sideInputHandler, SamzaTimerInternalsFactory<?> timerInternalsFactory, Coder<?> keyCoder, DoFnRunners.OutputManager outputManager, Coder<InT> inputCoder, List<TupleTag<?>> sideOutputTags, Map<TupleTag<?>, Coder<?>> outputCoders, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) {
    final KeyedInternals keyedInternals;
    final TimerInternals timerInternals;
    final StateInternals stateInternals;
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    final SamzaStoreStateInternals.Factory<?> stateInternalsFactory = SamzaStoreStateInternals.createStateInternalsFactory(transformId, keyCoder, context.getTaskContext(), pipelineOptions, signature);
    final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    if (StateUtils.isStateful(doFn)) {
        keyedInternals = new KeyedInternals(stateInternalsFactory, timerInternalsFactory);
        stateInternals = keyedInternals.stateInternals();
        timerInternals = keyedInternals.timerInternals();
    } else {
        keyedInternals = null;
        stateInternals = stateInternalsFactory.stateInternalsForKey(null);
        timerInternals = timerInternalsFactory.timerInternalsForKey(null);
    }
    final StepContext stepContext = createStepContext(stateInternals, timerInternals);
    final DoFnRunner<InT, FnOutT> underlyingRunner = DoFnRunners.simpleRunner(pipelineOptions, doFn, sideInputHandler, outputManager, mainOutputTag, sideOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
    final DoFnRunner<InT, FnOutT> doFnRunnerWithMetrics = pipelineOptions.getEnableMetrics() ? DoFnRunnerWithMetrics.wrap(underlyingRunner, executionContext.getMetricsContainer(), transformFullName) : underlyingRunner;
    if (keyedInternals != null) {
        final DoFnRunner<InT, FnOutT> statefulDoFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, inputCoder, doFnRunnerWithMetrics, stepContext, windowingStrategy, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, windowingStrategy), createStateCleaner(doFn, windowingStrategy, keyedInternals.stateInternals()));
        return new DoFnRunnerWithKeyedInternals<>(statefulDoFnRunner, keyedInternals);
    } else {
        return doFnRunnerWithMetrics;
    }
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) StepContext(org.apache.beam.runners.core.StepContext) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 9 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class MultiDoFnFunction method call.

@Override
public Iterator<Tuple2<TupleTag<?>, WindowedValue<?>>> call(Iterator<WindowedValue<InputT>> iter) throws Exception {
    if (!wasSetupCalled && iter.hasNext()) {
        DoFnInvokers.tryInvokeSetupFor(doFn, options.get());
        wasSetupCalled = true;
    }
    DoFnOutputManager outputManager = new DoFnOutputManager();
    final InMemoryTimerInternals timerInternals;
    final StepContext context;
    // Now only implements the StatefulParDo in Batch mode.
    Object key = null;
    if (stateful) {
        if (iter.hasNext()) {
            WindowedValue<InputT> currentValue = iter.next();
            key = ((KV) currentValue.getValue()).getKey();
            iter = Iterators.concat(Iterators.singletonIterator(currentValue), iter);
        }
        final InMemoryStateInternals<?> stateInternals = InMemoryStateInternals.forKey(key);
        timerInternals = new InMemoryTimerInternals();
        context = new StepContext() {

            @Override
            public StateInternals stateInternals() {
                return stateInternals;
            }

            @Override
            public TimerInternals timerInternals() {
                return timerInternals;
            }
        };
    } else {
        timerInternals = null;
        context = new SparkProcessContext.NoOpStepContext();
    }
    final DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(options.get(), doFn, CachedSideInputReader.of(new SparkSideInputReader(sideInputs)), outputManager, mainOutputTag, additionalOutputTags, context, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
    DoFnRunnerWithMetrics<InputT, OutputT> doFnRunnerWithMetrics = new DoFnRunnerWithMetrics<>(stepName, doFnRunner, metricsAccum);
    return new SparkProcessContext<>(doFn, doFnRunnerWithMetrics, outputManager, key, stateful ? new TimerDataIterator(timerInternals) : Collections.emptyIterator()).processPartition(iter).iterator();
}
Also used : StepContext(org.apache.beam.runners.core.StepContext) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) TimerInternals(org.apache.beam.runners.core.TimerInternals) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals) SparkSideInputReader(org.apache.beam.runners.spark.util.SparkSideInputReader)

Aggregations

StepContext (org.apache.beam.runners.core.StepContext)9 TimerInternals (org.apache.beam.runners.core.TimerInternals)5 DoFnRunners (org.apache.beam.runners.core.DoFnRunners)4 StateInternals (org.apache.beam.runners.core.StateInternals)4 KV (org.apache.beam.sdk.values.KV)4 List (java.util.List)3 DoFnRunner (org.apache.beam.runners.core.DoFnRunner)3 NullSideInputReader (org.apache.beam.runners.core.NullSideInputReader)3 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)3 PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)3 TupleTag (org.apache.beam.sdk.values.TupleTag)3 Instant (org.joda.time.Instant)3 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 Optional (java.util.Optional)2 Collectors (java.util.stream.Collectors)2 InMemoryStateInternals (org.apache.beam.runners.core.InMemoryStateInternals)2