Search in sources :

Example 56 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class TransformHierarchyTest method replaceWithCompositeSucceeds.

@Test
public void replaceWithCompositeSucceeds() {
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    TransformHierarchy.Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Entry<TupleTag<?>, PValue> replacementLongs = Iterables.getOnlyElement(replacementOutput.expand().entrySet());
    hierarchy.replaceOutputs(Collections.<PValue, ReplacementOutput>singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    assertThat(replacementParNode.getOutputs().keySet(), Matchers.<TupleTag<?>>contains(replacementLongs.getKey()));
    assertThat(replacementParNode.getOutputs().values(), Matchers.<PValue>contains(output));
    assertThat(compositeNode.getOutputs().keySet(), equalTo(replacementOutput.get(longs).expand().keySet()));
    assertThat(compositeNode.getOutputs().values(), Matchers.<PValue>contains(output));
    hierarchy.popNode();
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 57 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class SplittableProcessElementsEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
    ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
    DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
    processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
    String stepName = evaluationContext.getStepName(application);
    final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
    final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
    processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public StateInternals stateInternalsForKey(String key) {
            return (StateInternals) stepContext.stateInternals();
        }
    });
    processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {

        @Override
        public TimerInternals timerInternalsForKey(String key) {
            return stepContext.timerInternals();
        }
    });
    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {

        private final OutputManager outputManager = parDoEvaluator.getOutputManager();

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
    Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
    return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
Also used : ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) TupleTag(org.apache.beam.sdk.values.TupleTag) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ElementAndRestriction(org.apache.beam.runners.core.construction.ElementAndRestriction) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) Instant(org.joda.time.Instant) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 58 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class WatermarkManager method getInputProcessingWatermarks.

private Collection<Watermark> getInputProcessingWatermarks(AppliedPTransform<?, ?, ?> transform) {
    ImmutableList.Builder<Watermark> inputWmsBuilder = ImmutableList.builder();
    Map<TupleTag<?>, PValue> inputs = transform.getInputs();
    if (inputs.isEmpty()) {
        inputWmsBuilder.add(THE_END_OF_TIME);
    }
    for (PValue pvalue : inputs.values()) {
        Watermark producerOutputWatermark = getValueWatermark(pvalue).synchronizedProcessingOutputWatermark;
        inputWmsBuilder.add(producerOutputWatermark);
    }
    return inputWmsBuilder.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) TupleTag(org.apache.beam.sdk.values.TupleTag) PValue(org.apache.beam.sdk.values.PValue)

Example 59 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class WatermarkManager method getInputWatermarks.

private List<Watermark> getInputWatermarks(AppliedPTransform<?, ?, ?> transform) {
    ImmutableList.Builder<Watermark> inputWatermarksBuilder = ImmutableList.builder();
    Map<TupleTag<?>, PValue> inputs = transform.getInputs();
    if (inputs.isEmpty()) {
        inputWatermarksBuilder.add(THE_END_OF_TIME);
    }
    for (PValue pvalue : inputs.values()) {
        Watermark producerOutputWatermark = getValueWatermark(pvalue).outputWatermark;
        inputWatermarksBuilder.add(producerOutputWatermark);
    }
    List<Watermark> inputCollectionWatermarks = inputWatermarksBuilder.build();
    return inputCollectionWatermarks;
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) TupleTag(org.apache.beam.sdk.values.TupleTag) PValue(org.apache.beam.sdk.values.PValue)

Example 60 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class InstanceBuilderTest method testConstructor.

@Test
public void testConstructor() throws Exception {
    TupleTag tag = InstanceBuilder.ofType(TupleTag.class).withArg(String.class, "hello world!").build();
    Assert.assertEquals("hello world!", tag.getId());
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) Test(org.junit.Test)

Aggregations

TupleTag (org.apache.beam.sdk.values.TupleTag)67 Test (org.junit.Test)44 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)27 KV (org.apache.beam.sdk.values.KV)16 PCollection (org.apache.beam.sdk.values.PCollection)15 Instant (org.joda.time.Instant)14 WindowedValue (org.apache.beam.sdk.util.WindowedValue)13 PValue (org.apache.beam.sdk.values.PValue)13 Category (org.junit.experimental.categories.Category)13 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)10 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)9 DoFn (org.apache.beam.sdk.transforms.DoFn)9 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)9 PCollectionView (org.apache.beam.sdk.values.PCollectionView)9 Matchers.containsString (org.hamcrest.Matchers.containsString)9 Map (java.util.Map)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 Pipeline (org.apache.beam.sdk.Pipeline)5 ImmutableList (com.google.common.collect.ImmutableList)4 ArrayList (java.util.ArrayList)4