Search in sources :

Example 86 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class GroupAlsoByWindowParDoFnFactory method create.

@Override
public ParDoFn create(PipelineOptions options, CloudObject cloudUserFn, @Nullable List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, final DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    Map.Entry<TupleTag<?>, Integer> entry = Iterables.getOnlyElement(outputTupleTagsToReceiverIndices.entrySet());
    checkArgument(entry.getKey().equals(mainOutputTag), "Output tags should reference only the main output tag: %s vs %s", entry.getKey(), mainOutputTag);
    checkArgument(entry.getValue() == 0, "There should be a single receiver, but using receiver index %s", entry.getValue());
    byte[] encodedWindowingStrategy = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
    WindowingStrategy windowingStrategy;
    try {
        windowingStrategy = deserializeWindowingStrategy(encodedWindowingStrategy);
    } catch (Exception e) {
        // TODO: Catch block disappears, becoming an error once Python SDK is compliant.
        if (DataflowRunner.hasExperiment(options.as(DataflowPipelineDebugOptions.class), "beam_fn_api")) {
            LOG.info("FnAPI: Unable to deserialize windowing strategy, assuming default", e);
            windowingStrategy = WindowingStrategy.globalDefault();
        } else {
            throw e;
        }
    }
    byte[] serializedCombineFn = getBytes(cloudUserFn, WorkerPropertyNames.COMBINE_FN, null);
    AppliedCombineFn<?, ?, ?, ?> combineFn = null;
    if (serializedCombineFn != null) {
        Object combineFnObj = SerializableUtils.deserializeFromByteArray(serializedCombineFn, "serialized combine fn");
        checkArgument(combineFnObj instanceof AppliedCombineFn, "unexpected kind of AppliedCombineFn: " + combineFnObj.getClass().getName());
        combineFn = (AppliedCombineFn<?, ?, ?, ?>) combineFnObj;
    }
    Map<String, Object> inputCoderObject = getObject(cloudUserFn, WorkerPropertyNames.INPUT_CODER);
    Coder<?> inputCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(inputCoderObject));
    checkArgument(inputCoder instanceof WindowedValueCoder, "Expected WindowedValueCoder for inputCoder, got: " + inputCoder.getClass().getName());
    @SuppressWarnings("unchecked") WindowedValueCoder<?> windowedValueCoder = (WindowedValueCoder<?>) inputCoder;
    Coder<?> elemCoder = windowedValueCoder.getValueCoder();
    checkArgument(elemCoder instanceof KvCoder, "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
    @SuppressWarnings("unchecked") KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
    boolean isStreamingPipeline = options.as(StreamingOptions.class).isStreaming();
    SideInputReader sideInputReader = NullSideInputReader.empty();
    @Nullable AppliedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
    if (combineFn != null) {
        sideInputReader = executionContext.getSideInputReader(sideInputInfos, combineFn.getSideInputViews(), operationContext);
        String phase = getString(cloudUserFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
        checkArgument(phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE), "Unexpected phase: %s", phase);
        if (phase.equals(CombinePhase.MERGE)) {
            maybeMergingCombineFn = makeAppliedMergingFunction(combineFn);
        } else {
            maybeMergingCombineFn = combineFn;
        }
    }
    StateInternalsFactory<?> stateInternalsFactory = key -> executionContext.getStepContext(operationContext).stateInternals();
    // This will be a GABW Fn for either batch or streaming, with combiner in it or not
    GroupAlsoByWindowFn<?, ?> fn;
    // This will be a FakeKeyedWorkItemCoder for streaming or null for batch
    Coder<?> gabwInputCoder;
    // TODO: do not do this with mess of "if"
    if (isStreamingPipeline) {
        if (maybeMergingCombineFn == null) {
            fn = StreamingGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, stateInternalsFactory, ((KvCoder) kvCoder).getValueCoder());
            gabwInputCoder = WindmillKeyedWorkItem.FakeKeyedWorkItemCoder.of(kvCoder);
        } else {
            fn = StreamingGroupAlsoByWindowsDoFns.create(windowingStrategy, stateInternalsFactory, (AppliedCombineFn) maybeMergingCombineFn, ((KvCoder) kvCoder).getKeyCoder());
            gabwInputCoder = WindmillKeyedWorkItem.FakeKeyedWorkItemCoder.of(((AppliedCombineFn) maybeMergingCombineFn).getKvCoder());
        }
    } else {
        if (maybeMergingCombineFn == null) {
            fn = BatchGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, stateInternalsFactory, ((KvCoder) kvCoder).getValueCoder());
            gabwInputCoder = null;
        } else {
            fn = BatchGroupAlsoByWindowsDoFns.create(windowingStrategy, (AppliedCombineFn) maybeMergingCombineFn);
            gabwInputCoder = null;
        }
    }
    // TODO: or anyhow related to it, do not do this with mess of "if"
    if (maybeMergingCombineFn != null) {
        return new GroupAlsoByWindowsParDoFn(options, fn, windowingStrategy, ((AppliedCombineFn) maybeMergingCombineFn).getSideInputViews(), gabwInputCoder, sideInputReader, mainOutputTag, executionContext.getStepContext(operationContext));
    } else {
        return new GroupAlsoByWindowsParDoFn(options, fn, windowingStrategy, null, gabwInputCoder, sideInputReader, mainOutputTag, executionContext.getStepContext(operationContext));
    }
}
Also used : CombineFn(org.apache.beam.sdk.transforms.Combine.CombineFn) StateInternalsFactory(org.apache.beam.runners.core.StateInternalsFactory) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) CombineFnWithContext(org.apache.beam.sdk.transforms.CombineWithContext.CombineFnWithContext) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) LoggerFactory(org.slf4j.LoggerFactory) CloudObjects(org.apache.beam.runners.dataflow.util.CloudObjects) BatchGroupAlsoByWindowsDoFns(org.apache.beam.runners.dataflow.worker.util.BatchGroupAlsoByWindowsDoFns) Coder(org.apache.beam.sdk.coders.Coder) ListCoder(org.apache.beam.sdk.coders.ListCoder) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) ArrayList(java.util.ArrayList) GlobalCombineFn(org.apache.beam.sdk.transforms.CombineFnBase.GlobalCombineFn) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) TupleTag(org.apache.beam.sdk.values.TupleTag) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Structs.getBytes(org.apache.beam.runners.dataflow.util.Structs.getBytes) SideInputInfo(com.google.api.services.dataflow.model.SideInputInfo) SideInputReader(org.apache.beam.runners.core.SideInputReader) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) CannotProvideCoderException(org.apache.beam.sdk.coders.CannotProvideCoderException) KvCoder(org.apache.beam.sdk.coders.KvCoder) AppliedCombineFn(org.apache.beam.sdk.util.AppliedCombineFn) Logger(org.slf4j.Logger) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) DataflowRunner(org.apache.beam.runners.dataflow.DataflowRunner) WorkerPropertyNames(org.apache.beam.runners.dataflow.worker.util.WorkerPropertyNames) RootCase(org.apache.beam.model.pipeline.v1.RunnerApi.MessageWithComponents.RootCase) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) List(java.util.List) WindowingStrategyTranslation(org.apache.beam.runners.core.construction.WindowingStrategyTranslation) Structs.getObject(org.apache.beam.runners.dataflow.util.Structs.getObject) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Context(org.apache.beam.sdk.transforms.CombineWithContext.Context) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) PropertyNames(org.apache.beam.runners.dataflow.util.PropertyNames) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) TupleTag(org.apache.beam.sdk.values.TupleTag) SideInputReader(org.apache.beam.runners.core.SideInputReader) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KvCoder(org.apache.beam.sdk.coders.KvCoder) AppliedCombineFn(org.apache.beam.sdk.util.AppliedCombineFn) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) CannotProvideCoderException(org.apache.beam.sdk.coders.CannotProvideCoderException) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Structs.getObject(org.apache.beam.runners.dataflow.util.Structs.getObject) Map(java.util.Map) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 87 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class StreamingKeyedWorkItemSideInputDoFnRunnerTest method createRunner.

@SuppressWarnings("unchecked")
private StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow> createRunner(DoFnRunners.OutputManager outputManager) throws Exception {
    CoderRegistry registry = CoderRegistry.createDefault();
    Coder<String> keyCoder = StringUtf8Coder.of();
    Coder<Integer> inputCoder = BigEndianIntegerCoder.of();
    AppliedCombineFn<String, Integer, ?, Integer> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), registry, KvCoder.of(keyCoder, inputCoder));
    WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(WINDOW_FN);
    @SuppressWarnings("rawtypes") StreamingGroupAlsoByWindowViaWindowSetFn doFn = (StreamingGroupAlsoByWindowViaWindowSetFn) StreamingGroupAlsoByWindowsDoFns.create(windowingStrategy, key -> state, combineFn, keyCoder);
    DoFnRunner<KeyedWorkItem<String, Integer>, KV<String, Integer>> simpleDoFnRunner = new GroupAlsoByWindowFnRunner<>(PipelineOptionsFactory.create(), doFn.asDoFn(), mockSideInputReader, outputManager, mainOutputTag, stepContext);
    return new StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow>(simpleDoFnRunner, keyCoder, sideInputFetcher, stepContext);
}
Also used : Arrays(java.util.Arrays) KV(org.apache.beam.sdk.values.KV) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) KeyedWorkItems(org.apache.beam.runners.core.KeyedWorkItems) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) Matchers(org.mockito.Matchers) Mock(org.mockito.Mock) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Coder(org.apache.beam.sdk.coders.Coder) StateNamespaces(org.apache.beam.runners.core.StateNamespaces) TimerInternals(org.apache.beam.runners.core.TimerInternals) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) MockitoAnnotations(org.mockito.MockitoAnnotations) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) TupleTag(org.apache.beam.sdk.values.TupleTag) StateInternals(org.apache.beam.runners.core.StateInternals) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) SideInputReader(org.apache.beam.runners.core.SideInputReader) Before(org.junit.Before) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) KvCoder(org.apache.beam.sdk.coders.KvCoder) AppliedCombineFn(org.apache.beam.sdk.util.AppliedCombineFn) Timer(org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Set(java.util.Set) Sum(org.apache.beam.sdk.transforms.Sum) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) List(java.util.List) BagState(org.apache.beam.sdk.state.BagState) Instant(org.joda.time.Instant) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) ValueInEmptyWindows(org.apache.beam.runners.dataflow.worker.util.ValueInEmptyWindows) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow)

Example 88 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKeyWithCombinerAndSideInputs.

@Test
public void testPartialGroupByKeyWithCombinerAndSideInputs() throws Exception {
    Coder keyCoder = StringUtf8Coder.of();
    Coder valueCoder = BigEndianIntegerCoder.of();
    TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))), counterSet, NameContextsForTests.nameContextForTest());
    Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn = new TestCombiner();
    ParDoFn pgbkParDoFn = new StreamingSideInputPGBKParDoFn(GroupingTables.combining(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), combineFn, new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver, mockSideInputFetcher);
    Set<BoundedWindow> readyWindows = ImmutableSet.<BoundedWindow>of(GlobalWindow.INSTANCE);
    when(mockSideInputFetcher.getReadyWindows()).thenReturn(readyWindows);
    when(mockSideInputFetcher.prefetchElements(readyWindows)).thenReturn(ImmutableList.of(elemsBag));
    when(elemsBag.read()).thenReturn(ImmutableList.of(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)), WindowedValue.valueInGlobalWindow(KV.of("there", 5))));
    when(mockSideInputFetcher.storeIfBlocked(Matchers.<WindowedValue<KV<String, Integer>>>any())).thenReturn(false, false, false, true);
    pgbkParDoFn.startBundle(receiver);
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
    pgbkParDoFn.finishBundle();
    assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", 10)), WindowedValue.valueInGlobalWindow(KV.of("there", 13)), WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));
    // Exact counter values depend on size of encoded data.  If encoding
    // changes, then these expected counters should change to match.
    CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
    counterSet.extractUpdates(false, updateExtractor);
    verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
    verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(25L, 3));
    verifyNoMoreInteractions(updateExtractor);
}
Also used : CoderSizeEstimator(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.CoderSizeEstimator) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) WindowingCoderGroupingKeyCreator(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.WindowingCoderGroupingKeyCreator) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) KV(org.apache.beam.sdk.values.KV) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) Test(org.junit.Test)

Example 89 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndStreaming.

@Test
public void testCreateWithCombinerAndStreaming() throws Exception {
    StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
    options.setStreaming(true);
    Coder keyCoder = StringUtf8Coder.of();
    Coder valueCoder = BigEndianIntegerCoder.of();
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
    TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
    ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder), NullSideInputReader.empty(), receiver, null);
    assertTrue(pgbk instanceof SimplePartialGroupByKeyParDoFn);
}
Also used : ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) Test(org.junit.Test)

Example 90 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKeyWithCombiner.

@Test
public void testPartialGroupByKeyWithCombiner() throws Exception {
    Coder keyCoder = StringUtf8Coder.of();
    Coder valueCoder = BigEndianIntegerCoder.of();
    TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))), counterSet, NameContextsForTests.nameContextForTest());
    Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn = new TestCombiner();
    ParDoFn pgbkParDoFn = new SimplePartialGroupByKeyParDoFn(GroupingTables.combining(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), combineFn, new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver);
    pgbkParDoFn.startBundle(receiver);
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
    pgbkParDoFn.finishBundle();
    assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", 19)), WindowedValue.valueInGlobalWindow(KV.of("there", 13)), WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));
    // Exact counter values depend on size of encoded data.  If encoding
    // changes, then these expected counters should change to match.
    CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
    counterSet.extractUpdates(false, updateExtractor);
    verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
    verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(25L, 3));
    verifyNoMoreInteractions(updateExtractor);
}
Also used : CoderSizeEstimator(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.CoderSizeEstimator) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) WindowedValue(org.apache.beam.sdk.util.WindowedValue) WindowingCoderGroupingKeyCreator(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.WindowingCoderGroupingKeyCreator) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) Test(org.junit.Test)

Aggregations

Coder (org.apache.beam.sdk.coders.Coder)119 KvCoder (org.apache.beam.sdk.coders.KvCoder)75 WindowedValue (org.apache.beam.sdk.util.WindowedValue)55 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)44 Test (org.junit.Test)43 HashMap (java.util.HashMap)42 ArrayList (java.util.ArrayList)38 Map (java.util.Map)36 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)35 List (java.util.List)32 KV (org.apache.beam.sdk.values.KV)30 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)28 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)28 PCollection (org.apache.beam.sdk.values.PCollection)28 TupleTag (org.apache.beam.sdk.values.TupleTag)24 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)23 IOException (java.io.IOException)22 PCollectionView (org.apache.beam.sdk.values.PCollectionView)22 Instant (org.joda.time.Instant)21 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)20