use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ProcessBundleHandlerTest method testOrderOfSetupTeardownCalls.
@Test
public void testOrderOfSetupTeardownCalls() throws Exception {
DoFnWithExecutionInformation doFnWithExecutionInformation = DoFnWithExecutionInformation.of(new TestDoFn(), TestDoFn.mainOutput, Collections.emptyMap(), DoFnSchemaInformation.create());
RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnWithExecutionInformation))).build();
RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.newBuilder().setDoFn(functionSpec).build();
BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = BeamFnApi.ProcessBundleDescriptor.newBuilder().putTransforms("2L", PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).putOutputs("2L-output", "2L-output-pc").build()).putTransforms("3L", PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString())).putInputs("3L-input", "2L-output-pc").build()).putPcollections("2L-output-pc", PCollection.newBuilder().setWindowingStrategyId("window-strategy").setCoderId("2L-output-coder").setIsBounded(IsBounded.Enum.BOUNDED).build()).putWindowingStrategies("window-strategy", WindowingStrategy.newBuilder().setWindowCoderId("window-strategy-coder").setWindowFn(RunnerApi.FunctionSpec.newBuilder().setUrn("beam:window_fn:global_windows:v1")).setOutputTime(RunnerApi.OutputTime.Enum.END_OF_WINDOW).setAccumulationMode(RunnerApi.AccumulationMode.Enum.ACCUMULATING).setTrigger(RunnerApi.Trigger.newBuilder().setAlways(RunnerApi.Trigger.Always.getDefaultInstance())).setClosingBehavior(RunnerApi.ClosingBehavior.Enum.EMIT_ALWAYS).setOnTimeBehavior(RunnerApi.OnTimeBehavior.Enum.FIRE_ALWAYS).build()).putCoders("2L-output-coder", CoderTranslation.toProto(StringUtf8Coder.of()).getCoder()).putCoders("window-strategy-coder", Coder.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(ModelCoders.GLOBAL_WINDOW_CODER_URN).build()).build()).build();
Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
Map<String, PTransformRunnerFactory> urnToPTransformRunnerFactoryMap = Maps.newHashMap(REGISTERED_RUNNER_FACTORIES);
urnToPTransformRunnerFactoryMap.put(DATA_INPUT_URN, (context) -> null);
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateClient */
null, /* finalizeBundleHandler */
new ShortIdMap(), urnToPTransformRunnerFactoryMap, Caches.noop(), new BundleProcessorCache());
handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("998L").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("999L").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
handler.shutdown();
// setup and teardown should occur only once when processing multiple bundles for the same
// descriptor
assertThat(TestDoFn.orderOfOperations, contains("setUp", "startBundle", "finishBundle", "startBundle", "finishBundle", "tearDown"));
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ParDoTranslation method translateParDo.
public static ParDoPayload translateParDo(AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components) throws IOException {
final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform();
final Pipeline pipeline = appliedPTransform.getPipeline();
final DoFn<?, ?> doFn = parDo.getFn();
// Get main input.
Set<String> allInputs = appliedPTransform.getInputs().keySet().stream().map(TupleTag::getId).collect(Collectors.toSet());
Set<String> sideInputs = parDo.getSideInputs().values().stream().map(s -> s.getTagInternal().getId()).collect(Collectors.toSet());
String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs));
PCollection<?> mainInput = (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName));
final DoFnSchemaInformation doFnSchemaInformation = ParDo.getDoFnSchemaInformation(doFn, mainInput);
return translateParDo((ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components);
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ParDoTranslation method getMainInputName.
/**
* Returns the name of the main input of the ptransform.
*/
public static String getMainInputName(RunnerApi.PTransformOrBuilder ptransform) throws IOException {
checkArgument(PAR_DO_TRANSFORM_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PAIR_WITH_RESTRICTION_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PROCESS_ELEMENTS_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn()), "Unexpected payload type %s", ptransform.getSpec().getUrn());
ParDoPayload payload = ParDoPayload.parseFrom(ptransform.getSpec().getPayload());
return getMainInputName(ptransform, payload);
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class PipelineValidator method validateParDo.
private static void validateParDo(String id, PTransform transform, Components components, Set<String> requirements) throws Exception {
ParDoPayload payload = ParDoPayload.parseFrom(transform.getSpec().getPayload());
// side_inputs
for (String sideInputId : payload.getSideInputsMap().keySet()) {
checkArgument(transform.containsInputs(sideInputId), "Transform %s side input %s is not listed in the transform's inputs", id, sideInputId);
}
if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN));
// TODO: Validate state_specs and timer_specs
}
if (!payload.getRestrictionCoderId().isEmpty()) {
checkArgument(components.containsCoders(payload.getRestrictionCoderId()));
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_SPLITTABLE_DOFN_URN));
}
if (payload.getRequestsFinalization()) {
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_BUNDLE_FINALIZATION_URN));
}
if (payload.getRequiresStableInput()) {
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STABLE_INPUT_URN));
}
if (payload.getRequiresTimeSortedInput()) {
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_TIME_SORTED_INPUT_URN));
}
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class EnvironmentsTest method getEnvironmentPTransform.
@Test
public void getEnvironmentPTransform() throws IOException {
Pipeline p = Pipeline.create();
SdkComponents components = SdkComponents.create();
Environment env = Environments.createDockerEnvironment("java");
components.registerEnvironment(env);
ParDoPayload payload = ParDoTranslation.translateParDo(ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
}).withOutputTags(new TupleTag<>(), TupleTagList.empty()), PCollection.createPrimitiveOutputInternal(p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()), DoFnSchemaInformation.create(), Pipeline.create(), components);
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components.toComponents());
PTransform ptransform = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(payload.toByteString()).build()).setEnvironmentId(components.getOnlyEnvironmentId()).build();
Environment env1 = Environments.getEnvironment(ptransform, rehydratedComponents).get();
assertThat(env1, equalTo(components.toComponents().getEnvironmentsOrThrow(ptransform.getEnvironmentId())));
}
Aggregations