Search in sources :

Example 91 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndBatchSideInputs.

@Test
public void testCreateWithCombinerAndBatchSideInputs() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    Coder keyCoder = StringUtf8Coder.of();
    Coder valueCoder = BigEndianIntegerCoder.of();
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
    TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
    StepContext stepContext = BatchModeExecutionContext.forTesting(options, "testStage").getStepContext(TestOperationContext.create(counterSet));
    when(mockSideInputReader.isEmpty()).thenReturn(false);
    ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, stepContext);
    assertTrue(pgbk instanceof BatchSideInputPGBKParDoFn);
}
Also used : ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) PCollectionView(org.apache.beam.sdk.values.PCollectionView) StepContext(org.apache.beam.runners.core.StepContext) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) Test(org.junit.Test)

Example 92 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class UserParDoFnFactoryTest method testCleanupWorks.

@Test
public void testCleanupWorks() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    CounterSet counters = new CounterSet();
    DoFn<?, ?> initialFn = new TestStatefulDoFn();
    CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    StateInternals stateInternals = InMemoryStateInternals.forKey("dummy");
    // The overarching step context that only ParDoFn gets
    DataflowStepContext stepContext = mock(DataflowStepContext.class);
    // The user step context that the DoFnRunner gets a handle on
    DataflowStepContext userStepContext = mock(DataflowStepContext.class);
    when(stepContext.namespacedToUser()).thenReturn(userStepContext);
    when(stepContext.stateInternals()).thenReturn(stateInternals);
    when(userStepContext.stateInternals()).thenReturn((StateInternals) stateInternals);
    DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
    TestOperationContext operationContext = TestOperationContext.create(counters);
    when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
    when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
    ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
    Receiver rcvr = new OutputReceiver();
    parDoFn.startBundle(rcvr);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    StateNamespace firstWindowNamespace = StateNamespaces.window(windowCoder, firstWindow);
    StateNamespace secondWindowNamespace = StateNamespaces.window(windowCoder, secondWindow);
    StateTag<ValueState<String>> tag = StateTags.tagForSpec(TestStatefulDoFn.STATE_ID, StateSpecs.value(StringUtf8Coder.of()));
    // Set up non-empty state. We don't mock + verify calls to clear() but instead
    // check that state is actually empty. We musn't care how it is accomplished.
    stateInternals.state(firstWindowNamespace, tag).write("first");
    stateInternals.state(secondWindowNamespace, tag).write("second");
    when(userStepContext.getNextFiredTimer(windowCoder)).thenReturn(null);
    when(stepContext.getNextFiredTimer(windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindowNamespace, firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
    // This should fire the timer to clean up the first window
    parDoFn.processTimers();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
    when(stepContext.getNextFiredTimer((Coder) windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, secondWindowNamespace, secondWindow.maxTimestamp().plus(Duration.millis(1L)), secondWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
    // And this should clean up the second window
    parDoFn.processTimers();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Instant(org.joda.time.Instant) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DataflowStepContext(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowStepContext) StateNamespace(org.apache.beam.runners.core.StateNamespace) ValueState(org.apache.beam.sdk.state.ValueState) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 93 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class WindmillTimerInternalsTest method testTimerDataToFromTimer.

@Test
public void testTimerDataToFromTimer() {
    for (String stateFamily : TEST_STATE_FAMILIES) {
        for (KV<Coder<? extends BoundedWindow>, StateNamespace> coderAndNamespace : TEST_NAMESPACES_WITH_CODERS) {
            @Nullable Coder<? extends BoundedWindow> coder = coderAndNamespace.getKey();
            StateNamespace namespace = coderAndNamespace.getValue();
            for (TimeDomain timeDomain : TimeDomain.values()) {
                for (WindmillNamespacePrefix prefix : WindmillNamespacePrefix.values()) {
                    for (Instant timestamp : TEST_TIMESTAMPS) {
                        List<TimerData> anonymousTimers = ImmutableList.of(TimerData.of(namespace, timestamp, timestamp, timeDomain), TimerData.of(namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain));
                        for (TimerData timer : anonymousTimers) {
                            assertThat(WindmillTimerInternals.windmillTimerToTimerData(prefix, WindmillTimerInternals.timerDataToWindmillTimer(stateFamily, prefix, timer), coder), equalTo(timer));
                        }
                        for (String timerId : TEST_TIMER_IDS) {
                            List<TimerData> timers = ImmutableList.of(TimerData.of(timerId, namespace, timestamp, timestamp, timeDomain), TimerData.of(timerId, "family", namespace, timestamp, timestamp, timeDomain), TimerData.of(timerId, namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain), TimerData.of(timerId, "family", namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain));
                            for (TimerData timer : timers) {
                                assertThat(WindmillTimerInternals.windmillTimerToTimerData(prefix, WindmillTimerInternals.timerDataToWindmillTimer(stateFamily, prefix, timer), coder), equalTo(timer));
                            }
                        }
                    }
                }
            }
        }
    }
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) Instant(org.joda.time.Instant) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimeDomain(org.apache.beam.sdk.state.TimeDomain) StateNamespace(org.apache.beam.runners.core.StateNamespace) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Test(org.junit.Test)

Example 94 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class BatchSideInputHandlerFactory method forMultimapSideInput.

@Override
public <K, V, W extends BoundedWindow> MultimapSideInputHandler<K, V, W> forMultimapSideInput(String transformId, String sideInputId, KvCoder<K, V> elementCoder, Coder<W> windowCoder) {
    PCollectionNode collectionNode = sideInputToCollection.get(SideInputId.newBuilder().setTransformId(transformId).setLocalName(sideInputId).build());
    checkArgument(collectionNode != null, "No side input for %s/%s", transformId, sideInputId);
    Coder<K> keyCoder = elementCoder.getKeyCoder();
    Map<Object, Map<Object, KV<K, List<V>>>> /* structural key */
    data = new HashMap<>();
    List<WindowedValue<KV<K, V>>> broadcastVariable = sideInputGetter.getSideInput(collectionNode.getId());
    for (WindowedValue<KV<K, V>> windowedValue : broadcastVariable) {
        K key = windowedValue.getValue().getKey();
        V value = windowedValue.getValue().getValue();
        for (BoundedWindow boundedWindow : windowedValue.getWindows()) {
            @SuppressWarnings("unchecked") W window = (W) boundedWindow;
            Object structuralW = windowCoder.structuralValue(window);
            Object structuralK = keyCoder.structuralValue(key);
            KV<K, List<V>> records = data.computeIfAbsent(structuralW, o -> new HashMap<>()).computeIfAbsent(structuralK, o -> KV.of(key, new ArrayList<>()));
            records.getValue().add(value);
        }
    }
    return new MultimapSideInputHandler<K, V, W>() {

        @Override
        public Iterable<V> get(K key, W window) {
            KV<K, List<V>> records = data.getOrDefault(windowCoder.structuralValue(window), Collections.emptyMap()).get(keyCoder.structuralValue(key));
            if (records == null) {
                return Collections.emptyList();
            }
            return Collections.unmodifiableList(records.getValue());
        }

        @Override
        public Coder<V> valueCoder() {
            return elementCoder.getValueCoder();
        }

        @Override
        public Iterable<K> get(W window) {
            Map<Object, KV<K, List<V>>> records = data.getOrDefault(windowCoder.structuralValue(window), Collections.emptyMap());
            return Iterables.unmodifiableIterable(FluentIterable.concat(records.values()).transform(kListKV -> kListKV.getKey()));
        }

        @Override
        public Coder<K> keyCoder() {
            return elementCoder.getKeyCoder();
        }
    };
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ImmutableMultimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMultimap) IterableSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.IterableSideInputHandler) SideInputHandlerFactory(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.SideInputHandlerFactory) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ArrayList(java.util.ArrayList) MultimapSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.MultimapSideInputHandler) List(java.util.List) SideInputReference(org.apache.beam.runners.core.construction.graph.SideInputReference) Map(java.util.Map) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) FluentIterable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.FluentIterable) SideInputId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.SideInputId) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) Collections(java.util.Collections) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ArrayList(java.util.ArrayList) List(java.util.List) MultimapSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.MultimapSideInputHandler) HashMap(java.util.HashMap) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map)

Example 95 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class DefaultJobBundleFactory method getOutputReceivers.

private static Map<String, RemoteOutputReceiver<?>> getOutputReceivers(ExecutableProcessBundleDescriptor processBundleDescriptor, OutputReceiverFactory outputReceiverFactory) {
    ImmutableMap.Builder<String, RemoteOutputReceiver<?>> outputReceivers = ImmutableMap.builder();
    for (Map.Entry<String, Coder> remoteOutputCoder : processBundleDescriptor.getRemoteOutputCoders().entrySet()) {
        String outputTransform = remoteOutputCoder.getKey();
        Coder coder = remoteOutputCoder.getValue();
        String bundleOutputPCollection = Iterables.getOnlyElement(processBundleDescriptor.getProcessBundleDescriptor().getTransformsOrThrow(outputTransform).getInputsMap().values());
        FnDataReceiver outputReceiver = outputReceiverFactory.create(bundleOutputPCollection);
        outputReceivers.put(outputTransform, RemoteOutputReceiver.of(coder, outputReceiver));
    }
    return outputReceivers.build();
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)

Aggregations

Coder (org.apache.beam.sdk.coders.Coder)119 KvCoder (org.apache.beam.sdk.coders.KvCoder)75 WindowedValue (org.apache.beam.sdk.util.WindowedValue)55 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)44 Test (org.junit.Test)43 HashMap (java.util.HashMap)42 ArrayList (java.util.ArrayList)38 Map (java.util.Map)36 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)35 List (java.util.List)32 KV (org.apache.beam.sdk.values.KV)30 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)28 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)28 PCollection (org.apache.beam.sdk.values.PCollection)28 TupleTag (org.apache.beam.sdk.values.TupleTag)24 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)23 IOException (java.io.IOException)22 PCollectionView (org.apache.beam.sdk.values.PCollectionView)22 Instant (org.joda.time.Instant)21 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)20