use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKey.
@Test
public void testPartialGroupByKey() throws Exception {
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, IterableCoder.of(valueCoder)))), counterSet, NameContextsForTests.nameContextForTest());
ParDoFn pgbkParDoFn = new SimplePartialGroupByKeyParDoFn(GroupingTables.buffering(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver);
pgbkParDoFn.startBundle(receiver);
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
pgbkParDoFn.finishBundle();
assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", Arrays.asList(4, 6, 9))), WindowedValue.valueInGlobalWindow(KV.of("there", Arrays.asList(5, 8))), WindowedValue.valueInGlobalWindow(KV.of("joe", Arrays.asList(7)))));
// Exact counter values depend on size of encoded data. If encoding
// changes, then these expected counters should change to match.
CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
counterSet.extractUpdates(false, updateExtractor);
verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(49L, 3));
verifyNoMoreInteractions(updateExtractor);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndStreamingSideInputs.
@Test
public void testCreateWithCombinerAndStreamingSideInputs() throws Exception {
StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
options.setStreaming(true);
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
when(mockSideInputReader.isEmpty()).thenReturn(false);
when(mockStreamingStepContext.stateInternals()).thenReturn((StateInternals) mockStateInternals);
when(mockStateInternals.state(Matchers.<StateNamespace>any(), Matchers.<StateTag>any())).thenReturn(mockState);
when(mockState.read()).thenReturn(Maps.newHashMap());
ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, mockStreamingStepContext);
assertTrue(pgbk instanceof StreamingSideInputPGBKParDoFn);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class PairWithConstantKeyDoFnFactoryTest method testConversionOfRecord.
@Test
public void testConversionOfRecord() throws Exception {
ParDoFn parDoFn = new PairWithConstantKeyDoFnFactory().create(null, /* pipeline options */
CloudObject.fromSpec(ImmutableMap.of(PropertyNames.OBJECT_TYPE_NAME, "PairWithConstantKeyDoFn", WorkerPropertyNames.ENCODED_KEY, StringUtils.byteArrayToJsonString(CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), 42)), PropertyNames.ENCODING, ImmutableMap.of(PropertyNames.OBJECT_TYPE_NAME, "kind:fixed_big_endian_int32"))), null, /* side input infos */
null, /* main output tag */
null, /* output tag to receiver index */
null, /* exection context */
null);
List<Object> outputReceiver = new ArrayList<>();
parDoFn.startBundle(outputReceiver::add);
parDoFn.processElement(valueInGlobalWindow(43));
assertThat(outputReceiver, contains(valueInGlobalWindow(KV.of(42, 43))));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class ReifyTimestampAndWindowsParDoFnFactoryTest method verifyReifiedIsInTheSameWindows.
private <K, V> void verifyReifiedIsInTheSameWindows(WindowedValue<KV<K, V>> elem) throws Exception {
ParDoFn reifyFn = new ReifyTimestampAndWindowsParDoFnFactory().create(null, null, null, null, null, null, null);
SingleValueReceiver<WindowedValue<KV<K, WindowedValue<V>>>> receiver = new SingleValueReceiver<>();
reifyFn.startBundle(receiver);
// The important thing to test that is not just a restatement of the ParDoFn is that
// it only produces one element per input
reifyFn.processElement(elem);
assertThat(receiver.reified.getValue().getKey(), equalTo(elem.getValue().getKey()));
assertThat(receiver.reified.getValue().getValue().getValue(), equalTo(elem.getValue().getValue()));
assertThat(receiver.reified.getValue().getValue().getTimestamp(), equalTo(elem.getTimestamp()));
assertThat(receiver.reified.getValue().getValue().getWindows(), equalTo(elem.getWindows()));
assertThat(receiver.reified.getValue().getValue().getPane(), equalTo(elem.getPane()));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class UserParDoFnFactory method create.
@Override
public ParDoFn create(PipelineOptions options, CloudObject cloudUserFn, @Nullable List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
DoFnInstanceManager instanceManager = fnCache.get(operationContext.nameContext().systemName(), () -> DoFnInstanceManagers.cloningPool(doFnExtractor.getDoFnInfo(cloudUserFn), options));
DoFnInfo<?, ?> doFnInfo = instanceManager.peek();
DataflowExecutionContext.DataflowStepContext stepContext = executionContext.getStepContext(operationContext);
Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
SideInputReader sideInputReader = executionContext.getSideInputReader(sideInputInfos, sideInputViews, operationContext);
if (doFnInfo.getDoFn() instanceof BatchStatefulParDoOverrides.BatchStatefulDoFn) {
// HACK: BatchStatefulDoFn is a class from DataflowRunner's overrides
// that just instructs the worker to execute it differently. This will
// be replaced by metadata in the Runner API payload
BatchStatefulParDoOverrides.BatchStatefulDoFn fn = (BatchStatefulParDoOverrides.BatchStatefulDoFn) doFnInfo.getDoFn();
DoFn underlyingFn = fn.getUnderlyingDoFn();
return new BatchModeUngroupingParDoFn((BatchModeExecutionContext.StepContext) stepContext, new SimpleParDoFn(options, DoFnInstanceManagers.singleInstance(doFnInfo.withFn(underlyingFn)), sideInputReader, doFnInfo.getMainOutput(), outputTupleTagsToReceiverIndices, stepContext, operationContext, doFnInfo.getDoFnSchemaInformation(), doFnInfo.getSideInputMapping(), runnerFactory));
} else if (doFnInfo.getDoFn() instanceof StreamingPCollectionViewWriterFn) {
// HACK: StreamingPCollectionViewWriterFn is a class from
// DataflowPipelineTranslator. Using the class as an indicator is a migration path
// to simply having an indicator string.
checkArgument(stepContext instanceof StreamingModeExecutionContext.StreamingModeStepContext, "stepContext must be a StreamingModeStepContext to use StreamingPCollectionViewWriterFn");
DataflowRunner.StreamingPCollectionViewWriterFn<Object> writerFn = (StreamingPCollectionViewWriterFn<Object>) doFnInfo.getDoFn();
return new StreamingPCollectionViewWriterParDoFn((StreamingModeExecutionContext.StreamingModeStepContext) stepContext, writerFn.getView().getTagInternal(), writerFn.getDataCoder(), (Coder<BoundedWindow>) doFnInfo.getWindowingStrategy().getWindowFn().windowCoder());
} else {
return new SimpleParDoFn(options, instanceManager, sideInputReader, doFnInfo.getMainOutput(), outputTupleTagsToReceiverIndices, stepContext, operationContext, doFnInfo.getDoFnSchemaInformation(), doFnInfo.getSideInputMapping(), runnerFactory);
}
}
Aggregations