use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKeyWithCombiner.
@Test
public void testPartialGroupByKeyWithCombiner() throws Exception {
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))), counterSet, NameContextsForTests.nameContextForTest());
Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn = new TestCombiner();
ParDoFn pgbkParDoFn = new SimplePartialGroupByKeyParDoFn(GroupingTables.combining(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), combineFn, new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver);
pgbkParDoFn.startBundle(receiver);
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
pgbkParDoFn.finishBundle();
assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", 19)), WindowedValue.valueInGlobalWindow(KV.of("there", 13)), WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));
// Exact counter values depend on size of encoded data. If encoding
// changes, then these expected counters should change to match.
CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
counterSet.extractUpdates(false, updateExtractor);
verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(25L, 3));
verifyNoMoreInteractions(updateExtractor);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndBatchSideInputs.
@Test
public void testCreateWithCombinerAndBatchSideInputs() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
StepContext stepContext = BatchModeExecutionContext.forTesting(options, "testStage").getStepContext(TestOperationContext.create(counterSet));
when(mockSideInputReader.isEmpty()).thenReturn(false);
ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, stepContext);
assertTrue(pgbk instanceof BatchSideInputPGBKParDoFn);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class StreamingPCollectionViewWriterDoFnFactoryTest method testConstruction.
@Test
public void testConstruction() throws Exception {
DataflowOperationContext mockOperationContext = Mockito.mock(DataflowOperationContext.class);
DataflowExecutionContext mockExecutionContext = Mockito.mock(DataflowExecutionContext.class);
DataflowStepContext mockStepContext = Mockito.mock(StreamingModeExecutionContext.StepContext.class);
when(mockExecutionContext.getStepContext(mockOperationContext)).thenReturn(mockStepContext);
CloudObject coder = CloudObjects.asCloudObject(WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE), /*sdkComponents=*/
null);
ParDoFn parDoFn = new StreamingPCollectionViewWriterDoFnFactory().create(null, /* pipeline options */
CloudObject.fromSpec(ImmutableMap.of(PropertyNames.OBJECT_TYPE_NAME, "StreamingPCollectionViewWriterDoFn", PropertyNames.ENCODING, coder, WorkerPropertyNames.SIDE_INPUT_ID, "test-side-input-id")), null, /* side input infos */
null, /* main output tag */
null, /* output tag to receiver index */
mockExecutionContext, mockOperationContext);
assertThat(parDoFn, instanceOf(StreamingPCollectionViewWriterParDoFn.class));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class ValuesDoFnFactoryTest method testConversionOfRecord.
@Test
public void testConversionOfRecord() throws Exception {
ParDoFn parDoFn = new ValuesDoFnFactory().create(null, /* pipeline options */
CloudObject.fromSpec(ImmutableMap.of(PropertyNames.OBJECT_TYPE_NAME, "ValuesDoFn")), null, /* side input infos */
null, /* main output tag */
null, /* output tag to receiver index */
null, /* exection context */
null);
List<Object> outputReceiver = new ArrayList<>();
parDoFn.startBundle(outputReceiver::add);
parDoFn.processElement(valueInGlobalWindow(KV.of(42, 43)));
assertThat(outputReceiver, contains(valueInGlobalWindow(43)));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn in project beam by apache.
the class SimpleParDoFnTest method testUndeclaredSideOutputs.
@Test
public void testUndeclaredSideOutputs() throws Exception {
TestDoFn fn = new TestDoFn(ImmutableList.of(new TupleTag<>("declared"), new TupleTag<>("undecl1"), new TupleTag<>("undecl2"), new TupleTag<>("undecl3")));
DoFnInfo<?, ?> fnInfo = DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
null, /* input coder */
MAIN_OUTPUT, DoFnSchemaInformation.create(), Collections.emptyMap());
CounterSet counters = new CounterSet();
TestOperationContext operationContext = TestOperationContext.create(counters);
ParDoFn userParDoFn = new SimpleParDoFn<>(options, DoFnInstanceManagers.cloningPool(fnInfo, options), NullSideInputReader.empty(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0, new TupleTag<String>("declared"), 1), BatchModeExecutionContext.forTesting(options, "testStage").getStepContext(operationContext), operationContext, DoFnSchemaInformation.create(), Collections.emptyMap(), SimpleDoFnRunnerFactory.INSTANCE);
userParDoFn.startBundle(new TestReceiver(), new TestReceiver());
thrown.expect(UserCodeException.class);
thrown.expectCause(instanceOf(IllegalArgumentException.class));
thrown.expectMessage("Unknown output tag");
userParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
}
Aggregations