use of org.apache.beam.sdk.transforms.DoFn.ProcessContext in project beam by apache.
the class ProcessBundleDescriptorsTest method testLengthPrefixingOfKeyCoderInStatefulExecutableStage.
/**
* Tests that a stateful stage will wrap the key coder of a stateful transform in a
* LengthPrefixCoder.
*/
@Test
public void testLengthPrefixingOfKeyCoderInStatefulExecutableStage() throws Exception {
// Add another stateful stage with a non-standard key coder
Pipeline p = Pipeline.create();
Coder<Void> keycoder = VoidCoder.of();
assertThat(ModelCoderRegistrar.isKnownCoder(keycoder), is(false));
p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], KV<Void, String>>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
})).setCoder(KvCoder.of(keycoder, StringUtf8Coder.of())).apply("userState", ParDo.of(new DoFn<KV<Void, String>, KV<Void, String>>() {
@StateId("stateId")
private final StateSpec<BagState<String>> bufferState = StateSpecs.bag(StringUtf8Coder.of());
@TimerId("timerId")
private final TimerSpec timerSpec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(@Element KV<Void, String> element, @StateId("stateId") BagState<String> state, @TimerId("timerId") Timer timer, OutputReceiver<KV<Void, String>> r) {
}
@OnTimer("timerId")
public void onTimer() {
}
})).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> stage.getUserStates().stream().anyMatch(spec -> spec.localName().equals("stateId")));
checkState(optionalStage.isPresent(), "Expected a stage with user state.");
ExecutableStage stage = optionalStage.get();
PipelineNode.PCollectionNode inputPCollection = stage.getInputPCollection();
// Ensure original key coder is not a LengthPrefixCoder
Map<String, RunnerApi.Coder> stageCoderMap = stage.getComponents().getCodersMap();
RunnerApi.Coder originalMainInputCoder = stageCoderMap.get(inputPCollection.getPCollection().getCoderId());
String originalKeyCoderId = ModelCoders.getKvCoderComponents(originalMainInputCoder).keyCoderId();
RunnerApi.Coder originalKeyCoder = stageCoderMap.get(originalKeyCoderId);
assertThat(originalKeyCoder.getSpec().getUrn(), is(CoderTranslation.JAVA_SERIALIZED_CODER_URN));
// Now create ProcessBundleDescriptor and check for the LengthPrefixCoder around the key coder
BeamFnApi.ProcessBundleDescriptor pbd = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, Endpoints.ApiServiceDescriptor.getDefaultInstance()).getProcessBundleDescriptor();
Map<String, RunnerApi.Coder> pbsCoderMap = pbd.getCodersMap();
RunnerApi.Coder pbsMainInputCoder = pbsCoderMap.get(pbd.getPcollectionsOrThrow(inputPCollection.getId()).getCoderId());
String keyCoderId = ModelCoders.getKvCoderComponents(pbsMainInputCoder).keyCoderId();
RunnerApi.Coder keyCoder = pbsCoderMap.get(keyCoderId);
ensureLengthPrefixed(keyCoder, originalKeyCoder, pbsCoderMap);
TimerReference timerRef = Iterables.getOnlyElement(stage.getTimers());
String timerTransformId = timerRef.transform().getId();
RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(pbd.getTransformsOrThrow(timerTransformId).getSpec().getPayload());
RunnerApi.TimerFamilySpec timerSpec = parDoPayload.getTimerFamilySpecsOrThrow(timerRef.localName());
RunnerApi.Coder timerCoder = pbsCoderMap.get(timerSpec.getTimerFamilyCoderId());
String timerKeyCoderId = timerCoder.getComponentCoderIds(0);
RunnerApi.Coder timerKeyCoder = pbsCoderMap.get(timerKeyCoderId);
ensureLengthPrefixed(timerKeyCoder, originalKeyCoder, pbsCoderMap);
}
use of org.apache.beam.sdk.transforms.DoFn.ProcessContext in project beam by apache.
the class ProcessBundleDescriptorsTest method testLengthPrefixingOfInputCoderExecutableStage.
@Test
public void testLengthPrefixingOfInputCoderExecutableStage() throws Exception {
Pipeline p = Pipeline.create();
Coder<Void> voidCoder = VoidCoder.of();
assertThat(ModelCoderRegistrar.isKnownCoder(voidCoder), is(false));
p.apply("impulse", Impulse.create()).apply(ParDo.of(new DoFn<byte[], Void>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
})).setCoder(voidCoder).apply(ParDo.of(new DoFn<Void, Void>() {
@ProcessElement
public void processElement(ProcessContext context, RestrictionTracker<Void, Void> tracker) {
}
@GetInitialRestriction
public Void getInitialRestriction() {
return null;
}
@NewTracker
public SomeTracker newTracker(@Restriction Void restriction) {
return null;
}
})).setCoder(voidCoder);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
RunnerApi.Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipelineProto, SplittableParDoExpander.createSizedReplacement());
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineWithSdfExpanded);
Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> stage.getTransforms().stream().anyMatch(transform -> transform.getTransform().getSpec().getUrn().equals(PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN)));
checkState(optionalStage.isPresent(), "Expected a stage with SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.");
ExecutableStage stage = optionalStage.get();
PipelineNode.PCollectionNode inputPCollection = stage.getInputPCollection();
Map<String, RunnerApi.Coder> stageCoderMap = stage.getComponents().getCodersMap();
RunnerApi.Coder originalMainInputCoder = stageCoderMap.get(inputPCollection.getPCollection().getCoderId());
BeamFnApi.ProcessBundleDescriptor pbd = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, Endpoints.ApiServiceDescriptor.getDefaultInstance()).getProcessBundleDescriptor();
Map<String, RunnerApi.Coder> pbsCoderMap = pbd.getCodersMap();
RunnerApi.Coder pbsMainInputCoder = pbsCoderMap.get(pbd.getPcollectionsOrThrow(inputPCollection.getId()).getCoderId());
RunnerApi.Coder kvCoder = pbsCoderMap.get(ModelCoders.getKvCoderComponents(pbsMainInputCoder).keyCoderId());
RunnerApi.Coder keyCoder = pbsCoderMap.get(ModelCoders.getKvCoderComponents(kvCoder).keyCoderId());
RunnerApi.Coder valueKvCoder = pbsCoderMap.get(ModelCoders.getKvCoderComponents(kvCoder).valueCoderId());
RunnerApi.Coder valueCoder = pbsCoderMap.get(ModelCoders.getKvCoderComponents(valueKvCoder).keyCoderId());
RunnerApi.Coder originalKvCoder = stageCoderMap.get(ModelCoders.getKvCoderComponents(originalMainInputCoder).keyCoderId());
RunnerApi.Coder originalKeyCoder = stageCoderMap.get(ModelCoders.getKvCoderComponents(originalKvCoder).keyCoderId());
RunnerApi.Coder originalvalueKvCoder = stageCoderMap.get(ModelCoders.getKvCoderComponents(originalKvCoder).valueCoderId());
RunnerApi.Coder originalvalueCoder = stageCoderMap.get(ModelCoders.getKvCoderComponents(originalvalueKvCoder).keyCoderId());
ensureLengthPrefixed(keyCoder, originalKeyCoder, pbsCoderMap);
ensureLengthPrefixed(valueCoder, originalvalueCoder, pbsCoderMap);
}
Aggregations