use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class GroupAlsoByWindowParDoFnFactory method create.
@Override
public ParDoFn create(PipelineOptions options, CloudObject cloudUserFn, @Nullable List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, final DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
Map.Entry<TupleTag<?>, Integer> entry = Iterables.getOnlyElement(outputTupleTagsToReceiverIndices.entrySet());
checkArgument(entry.getKey().equals(mainOutputTag), "Output tags should reference only the main output tag: %s vs %s", entry.getKey(), mainOutputTag);
checkArgument(entry.getValue() == 0, "There should be a single receiver, but using receiver index %s", entry.getValue());
byte[] encodedWindowingStrategy = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
WindowingStrategy windowingStrategy;
try {
windowingStrategy = deserializeWindowingStrategy(encodedWindowingStrategy);
} catch (Exception e) {
// TODO: Catch block disappears, becoming an error once Python SDK is compliant.
if (DataflowRunner.hasExperiment(options.as(DataflowPipelineDebugOptions.class), "beam_fn_api")) {
LOG.info("FnAPI: Unable to deserialize windowing strategy, assuming default", e);
windowingStrategy = WindowingStrategy.globalDefault();
} else {
throw e;
}
}
byte[] serializedCombineFn = getBytes(cloudUserFn, WorkerPropertyNames.COMBINE_FN, null);
AppliedCombineFn<?, ?, ?, ?> combineFn = null;
if (serializedCombineFn != null) {
Object combineFnObj = SerializableUtils.deserializeFromByteArray(serializedCombineFn, "serialized combine fn");
checkArgument(combineFnObj instanceof AppliedCombineFn, "unexpected kind of AppliedCombineFn: " + combineFnObj.getClass().getName());
combineFn = (AppliedCombineFn<?, ?, ?, ?>) combineFnObj;
}
Map<String, Object> inputCoderObject = getObject(cloudUserFn, WorkerPropertyNames.INPUT_CODER);
Coder<?> inputCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(inputCoderObject));
checkArgument(inputCoder instanceof WindowedValueCoder, "Expected WindowedValueCoder for inputCoder, got: " + inputCoder.getClass().getName());
@SuppressWarnings("unchecked") WindowedValueCoder<?> windowedValueCoder = (WindowedValueCoder<?>) inputCoder;
Coder<?> elemCoder = windowedValueCoder.getValueCoder();
checkArgument(elemCoder instanceof KvCoder, "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
@SuppressWarnings("unchecked") KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
boolean isStreamingPipeline = options.as(StreamingOptions.class).isStreaming();
SideInputReader sideInputReader = NullSideInputReader.empty();
@Nullable AppliedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
if (combineFn != null) {
sideInputReader = executionContext.getSideInputReader(sideInputInfos, combineFn.getSideInputViews(), operationContext);
String phase = getString(cloudUserFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
checkArgument(phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE), "Unexpected phase: %s", phase);
if (phase.equals(CombinePhase.MERGE)) {
maybeMergingCombineFn = makeAppliedMergingFunction(combineFn);
} else {
maybeMergingCombineFn = combineFn;
}
}
StateInternalsFactory<?> stateInternalsFactory = key -> executionContext.getStepContext(operationContext).stateInternals();
// This will be a GABW Fn for either batch or streaming, with combiner in it or not
GroupAlsoByWindowFn<?, ?> fn;
// This will be a FakeKeyedWorkItemCoder for streaming or null for batch
Coder<?> gabwInputCoder;
// TODO: do not do this with mess of "if"
if (isStreamingPipeline) {
if (maybeMergingCombineFn == null) {
fn = StreamingGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, stateInternalsFactory, ((KvCoder) kvCoder).getValueCoder());
gabwInputCoder = WindmillKeyedWorkItem.FakeKeyedWorkItemCoder.of(kvCoder);
} else {
fn = StreamingGroupAlsoByWindowsDoFns.create(windowingStrategy, stateInternalsFactory, (AppliedCombineFn) maybeMergingCombineFn, ((KvCoder) kvCoder).getKeyCoder());
gabwInputCoder = WindmillKeyedWorkItem.FakeKeyedWorkItemCoder.of(((AppliedCombineFn) maybeMergingCombineFn).getKvCoder());
}
} else {
if (maybeMergingCombineFn == null) {
fn = BatchGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, stateInternalsFactory, ((KvCoder) kvCoder).getValueCoder());
gabwInputCoder = null;
} else {
fn = BatchGroupAlsoByWindowsDoFns.create(windowingStrategy, (AppliedCombineFn) maybeMergingCombineFn);
gabwInputCoder = null;
}
}
// TODO: or anyhow related to it, do not do this with mess of "if"
if (maybeMergingCombineFn != null) {
return new GroupAlsoByWindowsParDoFn(options, fn, windowingStrategy, ((AppliedCombineFn) maybeMergingCombineFn).getSideInputViews(), gabwInputCoder, sideInputReader, mainOutputTag, executionContext.getStepContext(operationContext));
} else {
return new GroupAlsoByWindowsParDoFn(options, fn, windowingStrategy, null, gabwInputCoder, sideInputReader, mainOutputTag, executionContext.getStepContext(operationContext));
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class StreamingKeyedWorkItemSideInputDoFnRunnerTest method createRunner.
@SuppressWarnings("unchecked")
private StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow> createRunner(DoFnRunners.OutputManager outputManager) throws Exception {
CoderRegistry registry = CoderRegistry.createDefault();
Coder<String> keyCoder = StringUtf8Coder.of();
Coder<Integer> inputCoder = BigEndianIntegerCoder.of();
AppliedCombineFn<String, Integer, ?, Integer> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), registry, KvCoder.of(keyCoder, inputCoder));
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(WINDOW_FN);
@SuppressWarnings("rawtypes") StreamingGroupAlsoByWindowViaWindowSetFn doFn = (StreamingGroupAlsoByWindowViaWindowSetFn) StreamingGroupAlsoByWindowsDoFns.create(windowingStrategy, key -> state, combineFn, keyCoder);
DoFnRunner<KeyedWorkItem<String, Integer>, KV<String, Integer>> simpleDoFnRunner = new GroupAlsoByWindowFnRunner<>(PipelineOptionsFactory.create(), doFn.asDoFn(), mockSideInputReader, outputManager, mainOutputTag, stepContext);
return new StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow>(simpleDoFnRunner, keyCoder, sideInputFetcher, stepContext);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKeyWithCombinerAndSideInputs.
@Test
public void testPartialGroupByKeyWithCombinerAndSideInputs() throws Exception {
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))), counterSet, NameContextsForTests.nameContextForTest());
Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn = new TestCombiner();
ParDoFn pgbkParDoFn = new StreamingSideInputPGBKParDoFn(GroupingTables.combining(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), combineFn, new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver, mockSideInputFetcher);
Set<BoundedWindow> readyWindows = ImmutableSet.<BoundedWindow>of(GlobalWindow.INSTANCE);
when(mockSideInputFetcher.getReadyWindows()).thenReturn(readyWindows);
when(mockSideInputFetcher.prefetchElements(readyWindows)).thenReturn(ImmutableList.of(elemsBag));
when(elemsBag.read()).thenReturn(ImmutableList.of(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)), WindowedValue.valueInGlobalWindow(KV.of("there", 5))));
when(mockSideInputFetcher.storeIfBlocked(Matchers.<WindowedValue<KV<String, Integer>>>any())).thenReturn(false, false, false, true);
pgbkParDoFn.startBundle(receiver);
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
pgbkParDoFn.finishBundle();
assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", 10)), WindowedValue.valueInGlobalWindow(KV.of("there", 13)), WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));
// Exact counter values depend on size of encoded data. If encoding
// changes, then these expected counters should change to match.
CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
counterSet.extractUpdates(false, updateExtractor);
verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(25L, 3));
verifyNoMoreInteractions(updateExtractor);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndStreaming.
@Test
public void testCreateWithCombinerAndStreaming() throws Exception {
StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
options.setStreaming(true);
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder), NullSideInputReader.empty(), receiver, null);
assertTrue(pgbk instanceof SimplePartialGroupByKeyParDoFn);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKeyWithCombiner.
@Test
public void testPartialGroupByKeyWithCombiner() throws Exception {
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))), counterSet, NameContextsForTests.nameContextForTest());
Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn = new TestCombiner();
ParDoFn pgbkParDoFn = new SimplePartialGroupByKeyParDoFn(GroupingTables.combining(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), combineFn, new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver);
pgbkParDoFn.startBundle(receiver);
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
pgbkParDoFn.finishBundle();
assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", 19)), WindowedValue.valueInGlobalWindow(KV.of("there", 13)), WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));
// Exact counter values depend on size of encoded data. If encoding
// changes, then these expected counters should change to match.
CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
counterSet.extractUpdates(false, updateExtractor);
verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(25L, 3));
verifyNoMoreInteractions(updateExtractor);
}
Aggregations