Search in sources :

Example 11 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class JavaClassLookupTransformProviderTest method testClassLookupExpansionRequestConstruction.

void testClassLookupExpansionRequestConstruction(ExternalTransforms.JavaClassLookupPayload payload, Map<String, Object> fieldsToVerify) {
    Pipeline p = Pipeline.create();
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName(TEST_NAME).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP)).setPayload(payload.toByteString()))).setNamespace(TEST_NAMESPACE).build();
    ExpansionApi.ExpansionResponse response = expansionService.expand(request);
    RunnerApi.PTransform expandedTransform = response.getTransform();
    assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
    assertThat(expandedTransform.getInputsCount(), Matchers.is(0));
    assertThat(expandedTransform.getOutputsCount(), Matchers.is(1));
    assertEquals(2, expandedTransform.getSubtransformsCount());
    assertEquals(2, expandedTransform.getSubtransformsCount());
    assertThat(expandedTransform.getSubtransforms(0), anyOf(containsString("MyCreateTransform"), containsString("MyParDoTransform")));
    assertThat(expandedTransform.getSubtransforms(1), anyOf(containsString("MyCreateTransform"), containsString("MyParDoTransform")));
    org.apache.beam.model.pipeline.v1.RunnerApi.PTransform userParDoTransform = null;
    for (String transformId : response.getComponents().getTransformsMap().keySet()) {
        if (transformId.contains("ParMultiDo-Dummy-")) {
            userParDoTransform = response.getComponents().getTransformsMap().get(transformId);
        }
    }
    assertNotNull(userParDoTransform);
    ParDoPayload parDoPayload = null;
    try {
        parDoPayload = ParDoPayload.parseFrom(userParDoTransform.getSpec().getPayload());
    } catch (InvalidProtocolBufferException e) {
        throw new RuntimeException(e);
    }
    assertNotNull(parDoPayload);
    DummyDoFn doFn = (DummyDoFn) ParDoTranslation.doFnWithExecutionInformationFromProto(parDoPayload.getDoFn()).getDoFn();
    System.out.println("DoFn" + doFn);
    List<String> verifiedFields = new ArrayList<>();
    if (fieldsToVerify.keySet().contains("strField1")) {
        assertEquals(doFn.strField1, fieldsToVerify.get("strField1"));
        verifiedFields.add("strField1");
    }
    if (fieldsToVerify.keySet().contains("strField2")) {
        assertEquals(doFn.strField2, fieldsToVerify.get("strField2"));
        verifiedFields.add("strField2");
    }
    if (fieldsToVerify.keySet().contains("intField1")) {
        assertEquals(doFn.intField1, fieldsToVerify.get("intField1"));
        verifiedFields.add("intField1");
    }
    if (fieldsToVerify.keySet().contains("doubleWrapperField")) {
        assertEquals(doFn.doubleWrapperField, fieldsToVerify.get("doubleWrapperField"));
        verifiedFields.add("doubleWrapperField");
    }
    if (fieldsToVerify.containsKey("complexTypeStrField")) {
        assertEquals(doFn.complexTypeField.complexTypeStrField, fieldsToVerify.get("complexTypeStrField"));
        verifiedFields.add("complexTypeStrField");
    }
    if (fieldsToVerify.containsKey("complexTypeIntField")) {
        assertEquals(doFn.complexTypeField.complexTypeIntField, fieldsToVerify.get("complexTypeIntField"));
        verifiedFields.add("complexTypeIntField");
    }
    if (fieldsToVerify.keySet().contains("strArrayField")) {
        assertArrayEquals(doFn.strArrayField, (String[]) fieldsToVerify.get("strArrayField"));
        verifiedFields.add("strArrayField");
    }
    if (fieldsToVerify.keySet().contains("strListField")) {
        assertEquals(doFn.strListField, (List) fieldsToVerify.get("strListField"));
        verifiedFields.add("strListField");
    }
    if (fieldsToVerify.keySet().contains("complexTypeArrayField")) {
        assertArrayEquals(doFn.complexTypeArrayField, (DummyComplexType[]) fieldsToVerify.get("complexTypeArrayField"));
        verifiedFields.add("complexTypeArrayField");
    }
    if (fieldsToVerify.keySet().contains("complexTypeListField")) {
        assertEquals(doFn.complexTypeListField, (List) fieldsToVerify.get("complexTypeListField"));
        verifiedFields.add("complexTypeListField");
    }
    List<String> unverifiedFields = new ArrayList<>(fieldsToVerify.keySet());
    unverifiedFields.removeAll(verifiedFields);
    if (!unverifiedFields.isEmpty()) {
        throw new RuntimeException("Failed to verify some fields: " + unverifiedFields);
    }
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) ArrayList(java.util.ArrayList) Matchers.containsString(org.hamcrest.Matchers.containsString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi)

Example 12 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ProcessBundleHandlerTest method setupProcessBundleHandlerForSimpleRecordingDoFn.

private ProcessBundleHandler setupProcessBundleHandlerForSimpleRecordingDoFn(List<String> dataOutput, List<Timers> timerOutput, boolean enableOutputEmbedding) throws Exception {
    DoFnWithExecutionInformation doFnWithExecutionInformation = DoFnWithExecutionInformation.of(new SimpleDoFn(), SimpleDoFn.MAIN_OUTPUT_TAG, Collections.emptyMap(), DoFnSchemaInformation.create());
    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnWithExecutionInformation))).build();
    RunnerApi.ParDoPayload parDoPayload = ParDoPayload.newBuilder().setDoFn(functionSpec).putTimerFamilySpecs("tfs-" + SimpleDoFn.TIMER_FAMILY_ID, TimerFamilySpec.newBuilder().setTimeDomain(RunnerApi.TimeDomain.Enum.EVENT_TIME).setTimerFamilyCoderId("timer-coder").build()).build();
    BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = ProcessBundleDescriptor.newBuilder().putTransforms("2L", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).putOutputs("2L-output", "2L-output-pc").build()).putTransforms("3L", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString())).putInputs("3L-input", "2L-output-pc").build()).putPcollections("2L-output-pc", PCollection.newBuilder().setWindowingStrategyId("window-strategy").setCoderId("2L-output-coder").setIsBounded(IsBounded.Enum.BOUNDED).build()).putWindowingStrategies("window-strategy", WindowingStrategy.newBuilder().setWindowCoderId("window-strategy-coder").setWindowFn(FunctionSpec.newBuilder().setUrn("beam:window_fn:global_windows:v1")).setOutputTime(OutputTime.Enum.END_OF_WINDOW).setAccumulationMode(AccumulationMode.Enum.ACCUMULATING).setTrigger(Trigger.newBuilder().setAlways(Always.getDefaultInstance())).setClosingBehavior(ClosingBehavior.Enum.EMIT_ALWAYS).setOnTimeBehavior(OnTimeBehavior.Enum.FIRE_ALWAYS).build()).setTimerApiServiceDescriptor(ApiServiceDescriptor.newBuilder().setUrl("url").build()).putCoders("string_coder", CoderTranslation.toProto(StringUtf8Coder.of()).getCoder()).putCoders("2L-output-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.KV_CODER_URN).build()).addComponentCoderIds("string_coder").addComponentCoderIds("string_coder").build()).putCoders("window-strategy-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.GLOBAL_WINDOW_CODER_URN).build()).build()).putCoders("timer-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.TIMER_CODER_URN)).addComponentCoderIds("string_coder").addComponentCoderIds("window-strategy-coder").build()).build();
    Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
    Map<String, PTransformRunnerFactory> urnToPTransformRunnerFactoryMap = Maps.newHashMap(REGISTERED_RUNNER_FACTORIES);
    urnToPTransformRunnerFactoryMap.put(DATA_INPUT_URN, (PTransformRunnerFactory<Object>) (context) -> {
        context.addIncomingDataEndpoint(ApiServiceDescriptor.getDefaultInstance(), KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), (input) -> {
            dataOutput.add(input.getValue());
        });
        return null;
    });
    Mockito.doAnswer((invocation) -> new BeamFnDataOutboundAggregator(PipelineOptionsFactory.create(), invocation.getArgument(1), new StreamObserver<Elements>() {

        @Override
        public void onNext(Elements elements) {
            for (Timers timer : elements.getTimersList()) {
                timerOutput.addAll(elements.getTimersList());
            }
        }

        @Override
        public void onError(Throwable throwable) {
        }

        @Override
        public void onCompleted() {
        }
    }, invocation.getArgument(2))).when(beamFnDataClient).createOutboundAggregator(any(), any(), anyBoolean());
    return new ProcessBundleHandler(PipelineOptionsFactory.create(), enableOutputEmbedding ? Collections.singleton(BeamUrns.getUrn(StandardRunnerProtocols.Enum.CONTROL_RESPONSE_ELEMENTS_EMBEDDING)) : Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateClient */
    null, /* finalizeBundleHandler */
    new ShortIdMap(), urnToPTransformRunnerFactoryMap, Caches.noop(), new BundleProcessorCache());
}
Also used : BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) Assert.assertNotSame(org.junit.Assert.assertNotSame) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MockitoAnnotations(org.mockito.MockitoAnnotations) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Arrays.asList(java.util.Arrays.asList) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Map(java.util.Map) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Uninterruptibles(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.Uninterruptibles) ApiServiceDescriptor(org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) KvCoder(org.apache.beam.sdk.coders.KvCoder) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) TimerEndpoint(org.apache.beam.sdk.fn.data.TimerEndpoint) Set(java.util.Set) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) Data(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) StandardRunnerProtocols(org.apache.beam.model.pipeline.v1.RunnerApi.StandardRunnerProtocols) Matchers.contains(org.hamcrest.Matchers.contains) PTransformRunnerFactory(org.apache.beam.fn.harness.PTransformRunnerFactory) Matchers.is(org.hamcrest.Matchers.is) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) ClosingBehavior(org.apache.beam.model.pipeline.v1.RunnerApi.ClosingBehavior) KV(org.apache.beam.sdk.values.KV) TimerMap(org.apache.beam.sdk.state.TimerMap) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) Mock(org.mockito.Mock) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) RunWith(org.junit.runner.RunWith) TimerFamilySpec(org.apache.beam.model.pipeline.v1.RunnerApi.TimerFamilySpec) ArgumentMatchers.anyBoolean(org.mockito.ArgumentMatchers.anyBoolean) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Assert.assertSame(org.junit.Assert.assertSame) Timers(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Timers) PCollectionConsumerRegistry(org.apache.beam.fn.harness.data.PCollectionConsumerRegistry) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) TimerSpec(org.apache.beam.sdk.state.TimerSpec) TupleTag(org.apache.beam.sdk.values.TupleTag) Cache(org.apache.beam.fn.harness.Cache) BeamFnDataClient(org.apache.beam.fn.harness.data.BeamFnDataClient) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) CloseableFnDataReceiver(org.apache.beam.sdk.fn.data.CloseableFnDataReceiver) ProcessBundleRequest(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest) TimerFamilyDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerFamilyDeclaration) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Test(org.junit.Test) OnTimeBehavior(org.apache.beam.model.pipeline.v1.RunnerApi.OnTimeBehavior) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) BeamUrns(org.apache.beam.runners.core.construction.BeamUrns) DataEndpoint(org.apache.beam.sdk.fn.data.DataEndpoint) Assert.assertNull(org.junit.Assert.assertNull) AccumulationMode(org.apache.beam.model.pipeline.v1.RunnerApi.AccumulationMode) OutputTime(org.apache.beam.model.pipeline.v1.RunnerApi.OutputTime) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) StateResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateResponse) Coder(org.apache.beam.model.pipeline.v1.RunnerApi.Coder) BeamFnStateClient(org.apache.beam.fn.harness.state.BeamFnStateClient) CoderTranslation(org.apache.beam.runners.core.construction.CoderTranslation) Mockito.argThat(org.mockito.Mockito.argThat) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) BundleProcessorCache(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache) IsBounded(org.apache.beam.model.pipeline.v1.RunnerApi.IsBounded) Mockito.verifyNoMoreInteractions(org.mockito.Mockito.verifyNoMoreInteractions) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) BundleProcessor(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor) REGISTERED_RUNNER_FACTORIES(org.apache.beam.fn.harness.control.ProcessBundleHandler.REGISTERED_RUNNER_FACTORIES) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Collection(java.util.Collection) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) ModelCoders(org.apache.beam.runners.core.construction.ModelCoders) List(java.util.List) InstructionRequest(org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionRequest) StateRequest(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateRequest) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Trigger(org.apache.beam.model.pipeline.v1.RunnerApi.Trigger) Matchers.equalTo(org.hamcrest.Matchers.equalTo) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) WindowingStrategy(org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Always(org.apache.beam.model.pipeline.v1.RunnerApi.Trigger.Always) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Assert.assertThrows(org.junit.Assert.assertThrows) IsEmptyCollection.empty(org.hamcrest.collection.IsEmptyCollection.empty) CompletableFuture(java.util.concurrent.CompletableFuture) BeamFnStateGrpcClientCache(org.apache.beam.fn.harness.state.BeamFnStateGrpcClientCache) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) BeamFnDataReadRunner(org.apache.beam.fn.harness.BeamFnDataReadRunner) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) HashSet(java.util.HashSet) CacheToken(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest.CacheToken) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) InstructionResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionResponse) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ParDoTranslation(org.apache.beam.runners.core.construction.ParDoTranslation) Assert.assertNotNull(org.junit.Assert.assertNotNull) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) Caches(org.apache.beam.fn.harness.Caches) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) Collections(java.util.Collections) BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BundleProcessorCache(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PTransformRunnerFactory(org.apache.beam.fn.harness.PTransformRunnerFactory) Timers(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Timers)

Aggregations

ParDoPayload (org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)10 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)6 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 ByteString (com.google.protobuf.ByteString)2 IOException (java.io.IOException)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Set (java.util.Set)2 PTransformRunnerFactory (org.apache.beam.fn.harness.PTransformRunnerFactory)2 BundleProcessorCache (org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache)2 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)2 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)2 Pipeline (org.apache.beam.sdk.Pipeline)2 ParDoPayload (org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload)2 MultiOutput (org.apache.beam.sdk.transforms.ParDo.MultiOutput)2 DoFnWithExecutionInformation (org.apache.beam.sdk.util.DoFnWithExecutionInformation)2