Search in sources :

Example 51 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class SplittableProcessElementsEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
    ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
    DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
    processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
    String stepName = evaluationContext.getStepName(application);
    final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
    final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
    processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public StateInternals stateInternalsForKey(String key) {
            return (StateInternals) stepContext.stateInternals();
        }
    });
    processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {

        @Override
        public TimerInternals timerInternalsForKey(String key) {
            return stepContext.timerInternals();
        }
    });
    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {

        private final OutputManager outputManager = parDoEvaluator.getOutputManager();

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
    Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
    return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
Also used : ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) TupleTag(org.apache.beam.sdk.values.TupleTag) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ElementAndRestriction(org.apache.beam.runners.core.construction.ElementAndRestriction) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) Instant(org.joda.time.Instant) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 52 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class CombineTest method testCombinePerKeyWithHotKeyFanoutPrimitiveDisplayData.

@Test
@Category(ValidatesRunner.class)
public void testCombinePerKeyWithHotKeyFanoutPrimitiveDisplayData() {
    int hotKeyFanout = 2;
    DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
    CombineTest.UniqueInts combineFn = new CombineTest.UniqueInts();
    PTransform<PCollection<KV<Integer, Integer>>, PCollection<KV<Integer, Set<Integer>>>> combine = Combine.<Integer, Integer, Set<Integer>>perKey(combineFn).withHotKeyFanout(hotKeyFanout);
    Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(combine, KvCoder.of(VarIntCoder.of(), VarIntCoder.of()));
    assertThat("Combine.perKey.withHotKeyFanout should include the combineFn in its primitive " + "transform", displayData, hasItem(hasDisplayItem("combineFn", combineFn.getClass())));
    assertThat("Combine.perKey.withHotKeyFanout(int) should include the fanout in its primitive " + "transform", displayData, hasItem(hasDisplayItem("fanout", hotKeyFanout)));
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) DisplayDataEvaluator(org.apache.beam.sdk.transforms.display.DisplayDataEvaluator) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 53 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class CombineTest method testCombinePerKeyPrimitiveDisplayData.

@Test
@Category(ValidatesRunner.class)
public void testCombinePerKeyPrimitiveDisplayData() {
    DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
    CombineTest.UniqueInts combineFn = new CombineTest.UniqueInts();
    PTransform<PCollection<KV<Integer, Integer>>, ? extends POutput> combine = Combine.perKey(combineFn);
    Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(combine, KvCoder.of(VarIntCoder.of(), VarIntCoder.of()));
    assertThat("Combine.perKey should include the combineFn in its primitive transform", displayData, hasItem(hasDisplayItem("combineFn", combineFn.getClass())));
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) DisplayDataEvaluator(org.apache.beam.sdk.transforms.display.DisplayDataEvaluator) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

PCollection (org.apache.beam.sdk.values.PCollection)53 Test (org.junit.Test)38 TupleTag (org.apache.beam.sdk.values.TupleTag)15 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)12 KV (org.apache.beam.sdk.values.KV)12 PValue (org.apache.beam.sdk.values.PValue)12 PTransform (org.apache.beam.sdk.transforms.PTransform)10 DisplayData (org.apache.beam.sdk.transforms.display.DisplayData)9 Instant (org.joda.time.Instant)9 DoFn (org.apache.beam.sdk.transforms.DoFn)6 DisplayDataEvaluator (org.apache.beam.sdk.transforms.display.DisplayDataEvaluator)6 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)6 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)6 PCollectionList (org.apache.beam.sdk.values.PCollectionList)6 Pipeline (org.apache.beam.sdk.Pipeline)5 PBegin (org.apache.beam.sdk.values.PBegin)5 PCollectionView (org.apache.beam.sdk.values.PCollectionView)5 Matchers.isEmptyOrNullString (org.hamcrest.Matchers.isEmptyOrNullString)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4