Search in sources :

Example 66 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class FlinkBatchPortablePipelineTranslator method translateExecutableStage.

private static <InputT> void translateExecutableStage(PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) {
    // TODO: Fail on splittable DoFns.
    // TODO: Special-case single outputs to avoid multiplexing PCollections.
    RunnerApi.Components components = pipeline.getComponents();
    Map<String, String> outputs = transform.getTransform().getOutputsMap();
    // Mapping from PCollection id to coder tag id.
    BiMap<String, Integer> outputMap = createOutputMap(outputs.values());
    // Collect all output Coders and create a UnionCoder for our tagged outputs.
    List<Coder<?>> unionCoders = Lists.newArrayList();
    // Enforce tuple tag sorting by union tag index.
    Map<String, Coder<WindowedValue<?>>> outputCoders = Maps.newHashMap();
    for (String collectionId : new TreeMap<>(outputMap.inverse()).values()) {
        PCollectionNode collectionNode = PipelineNode.pCollection(collectionId, components.getPcollectionsOrThrow(collectionId));
        Coder<WindowedValue<?>> coder;
        try {
            coder = (Coder) WireCoders.instantiateRunnerWireCoder(collectionNode, components);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        outputCoders.put(collectionId, coder);
        unionCoders.add(coder);
    }
    UnionCoder unionCoder = UnionCoder.of(unionCoders);
    TypeInformation<RawUnionValue> typeInformation = new CoderTypeInformation<>(unionCoder, context.getPipelineOptions());
    RunnerApi.ExecutableStagePayload stagePayload;
    try {
        stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getTransform().getSpec().getPayload());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String inputPCollectionId = stagePayload.getInput();
    Coder<WindowedValue<InputT>> windowedInputCoder = instantiateCoder(inputPCollectionId, components);
    DataSet<WindowedValue<InputT>> inputDataSet = context.getDataSetOrThrow(inputPCollectionId);
    final FlinkExecutableStageFunction<InputT> function = new FlinkExecutableStageFunction<>(transform.getTransform().getUniqueName(), context.getPipelineOptions(), stagePayload, context.getJobInfo(), outputMap, FlinkExecutableStageContextFactory.getInstance(), getWindowingStrategy(inputPCollectionId, components).getWindowFn().windowCoder(), windowedInputCoder);
    final String operatorName = generateNameFromStagePayload(stagePayload);
    final SingleInputUdfOperator taggedDataset;
    if (stagePayload.getUserStatesCount() > 0 || stagePayload.getTimersCount() > 0) {
        Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) windowedInputCoder).getValueCoder();
        // Stateful stages are only allowed of KV input to be able to group on the key
        if (!(valueCoder instanceof KvCoder)) {
            throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for stateful DoFn '%s' must be KvCoder but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
        }
        Coder keyCoder = ((KvCoder) valueCoder).getKeyCoder();
        Grouping<WindowedValue<InputT>> groupedInput = inputDataSet.groupBy(new KvKeySelector<>(keyCoder));
        boolean requiresTimeSortedInput = requiresTimeSortedInput(stagePayload, false);
        if (requiresTimeSortedInput) {
            groupedInput = ((UnsortedGrouping<WindowedValue<InputT>>) groupedInput).sortGroup(WindowedValue::getTimestamp, Order.ASCENDING);
        }
        taggedDataset = new GroupReduceOperator<>(groupedInput, typeInformation, function, operatorName);
    } else {
        taggedDataset = new MapPartitionOperator<>(inputDataSet, typeInformation, function, operatorName);
    }
    for (SideInputId sideInputId : stagePayload.getSideInputsList()) {
        String collectionId = stagePayload.getComponents().getTransformsOrThrow(sideInputId.getTransformId()).getInputsOrThrow(sideInputId.getLocalName());
        // Register under the global PCollection name. Only ExecutableStageFunction needs to know the
        // mapping from local name to global name and how to translate the broadcast data to a state
        // API view.
        taggedDataset.withBroadcastSet(context.getDataSetOrThrow(collectionId), collectionId);
    }
    for (String collectionId : outputs.values()) {
        pruneOutput(taggedDataset, context, outputMap.get(collectionId), outputCoders.get(collectionId), collectionId);
    }
    if (outputs.isEmpty()) {
        // NOTE: After pipeline translation, we traverse the set of unconsumed PCollections and add a
        // no-op sink to each to make sure they are materialized by Flink. However, some SDK-executed
        // stages have no runner-visible output after fusion. We handle this case by adding a sink
        // here.
        taggedDataset.output(new DiscardingOutputFormat<>()).name("DiscardingOutput");
    }
}
Also used : DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) FlinkExecutableStageFunction(org.apache.beam.runners.flink.translation.functions.FlinkExecutableStageFunction) WindowedValue(org.apache.beam.sdk.util.WindowedValue) SideInputId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.SideInputId) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) UnionCoder(org.apache.beam.sdk.transforms.join.UnionCoder) PipelineTranslatorUtils.instantiateCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder) Coder(org.apache.beam.sdk.coders.Coder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) UnionCoder(org.apache.beam.sdk.transforms.join.UnionCoder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) SingleInputUdfOperator(org.apache.flink.api.java.operators.SingleInputUdfOperator) KvCoder(org.apache.beam.sdk.coders.KvCoder) IOException(java.io.IOException) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)

Example 67 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class InsertFetchAndFilterStreamingSideInputNodesTest method testSdkParDoWithoutSideInput.

@Test
public void testSdkParDoWithoutSideInput() throws Exception {
    Pipeline p = Pipeline.create();
    PCollection<String> pc = p.apply(Create.of("a", "b", "c"));
    pc.apply(ParDo.of(new TestDoFn()));
    RunnerApi.Pipeline pipeline = PipelineTranslation.toProto(p);
    Node predecessor = createParDoNode("predecessor");
    Node mainInput = InstructionOutputNode.create(new InstructionOutput(), "fakeId");
    Node sideInputParDo = createParDoNode("noSideInput");
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(predecessor);
    network.addNode(mainInput);
    network.addNode(sideInputParDo);
    network.addEdge(predecessor, mainInput, DefaultEdge.create());
    network.addEdge(mainInput, sideInputParDo, DefaultEdge.create());
    Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
    network = InsertFetchAndFilterStreamingSideInputNodes.with(pipeline).forNetwork(network);
    assertThatNetworksAreIdentical(inputNetwork, network);
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) FetchAndFilterStreamingSideInputsNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 68 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class ProcessBundleDescriptors method lengthPrefixAnyInputCoder.

/**
 * Patches the input coder of the transform to ensure that the byte representation of input used
 * at the Runner, matches the byte representation received from the SDK Harness.
 */
private static void lengthPrefixAnyInputCoder(String inputPCollectionId, Components.Builder componentsBuilder) {
    RunnerApi.PCollection pcollection = componentsBuilder.getPcollectionsOrThrow(inputPCollectionId);
    String newInputCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(pcollection.getCoderId(), componentsBuilder, false);
    componentsBuilder.putPcollections(inputPCollectionId, pcollection.toBuilder().setCoderId(newInputCoderId).build());
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi)

Example 69 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class RemoteExecutionTest method testExecutionWithSideInput.

@Test
public void testExecutionWithSideInput() throws Exception {
    launchSdkHarness(PipelineOptionsFactory.create());
    Pipeline p = Pipeline.create();
    addExperiment(p.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
    // TODO(BEAM-10097): Remove experiment once all portable runners support this view type
    addExperiment(p.getOptions().as(ExperimentalOptions.class), "use_runner_v2");
    PCollection<String> input = p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], String>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
            ctxt.output("zero");
            ctxt.output("one");
            ctxt.output("two");
        }
    })).setCoder(StringUtf8Coder.of());
    PCollectionView<Iterable<String>> iterableView = input.apply("createIterableSideInput", View.asIterable());
    PCollectionView<Map<String, Iterable<String>>> multimapView = input.apply(WithKeys.of("key")).apply("createMultimapSideInput", View.asMultimap());
    input.apply("readSideInput", ParDo.of(new DoFn<String, KV<String, String>>() {

        @ProcessElement
        public void processElement(ProcessContext context) {
            for (String value : context.sideInput(iterableView)) {
                context.output(KV.of(context.element(), value));
            }
            for (Map.Entry<String, Iterable<String>> entry : context.sideInput(multimapView).entrySet()) {
                for (String value : entry.getValue()) {
                    context.output(KV.of(context.element(), entry.getKey() + ":" + value));
                }
            }
        }
    }).withSideInputs(iterableView, multimapView)).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("gbk", GroupByKey.create());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
    Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> !stage.getSideInputs().isEmpty());
    checkState(optionalStage.isPresent(), "Expected a stage with side inputs.");
    ExecutableStage stage = optionalStage.get();
    ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
    BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator);
    Map<String, Coder> remoteOutputCoders = descriptor.getRemoteOutputCoders();
    Map<String, Collection<WindowedValue<?>>> outputValues = new HashMap<>();
    Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
    for (Entry<String, Coder> remoteOutputCoder : remoteOutputCoders.entrySet()) {
        List<WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
        outputValues.put(remoteOutputCoder.getKey(), outputContents);
        outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputContents::add));
    }
    StateRequestHandler stateRequestHandler = StateRequestHandlers.forSideInputHandlerFactory(descriptor.getSideInputSpecs(), new SideInputHandlerFactory() {

        @Override
        public <V, W extends BoundedWindow> IterableSideInputHandler<V, W> forIterableSideInput(String pTransformId, String sideInputId, Coder<V> elementCoder, Coder<W> windowCoder) {
            return new IterableSideInputHandler<V, W>() {

                @Override
                public Iterable<V> get(W window) {
                    return (Iterable) Arrays.asList("A", "B", "C");
                }

                @Override
                public Coder<V> elementCoder() {
                    return elementCoder;
                }
            };
        }

        @Override
        public <K, V, W extends BoundedWindow> MultimapSideInputHandler<K, V, W> forMultimapSideInput(String pTransformId, String sideInputId, KvCoder<K, V> elementCoder, Coder<W> windowCoder) {
            return new MultimapSideInputHandler<K, V, W>() {

                @Override
                public Iterable<K> get(W window) {
                    return (Iterable) Arrays.asList("key1", "key2");
                }

                @Override
                public Iterable<V> get(K key, W window) {
                    if ("key1".equals(key)) {
                        return (Iterable) Arrays.asList("H", "I", "J");
                    } else if ("key2".equals(key)) {
                        return (Iterable) Arrays.asList("M", "N", "O");
                    }
                    return Collections.emptyList();
                }

                @Override
                public Coder<K> keyCoder() {
                    return elementCoder.getKeyCoder();
                }

                @Override
                public Coder<V> valueCoder() {
                    return elementCoder.getValueCoder();
                }
            };
        }
    });
    BundleProgressHandler progressHandler = BundleProgressHandler.ignored();
    try (RemoteBundle bundle = processor.newBundle(outputReceivers, stateRequestHandler, progressHandler)) {
        Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow("X"));
        Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow("Y"));
    }
    for (Collection<WindowedValue<?>> windowedValues : outputValues.values()) {
        assertThat(windowedValues, containsInAnyOrder(valueInGlobalWindow(KV.of("X", "A")), valueInGlobalWindow(KV.of("X", "B")), valueInGlobalWindow(KV.of("X", "C")), valueInGlobalWindow(KV.of("X", "key1:H")), valueInGlobalWindow(KV.of("X", "key1:I")), valueInGlobalWindow(KV.of("X", "key1:J")), valueInGlobalWindow(KV.of("X", "key2:M")), valueInGlobalWindow(KV.of("X", "key2:N")), valueInGlobalWindow(KV.of("X", "key2:O")), valueInGlobalWindow(KV.of("Y", "A")), valueInGlobalWindow(KV.of("Y", "B")), valueInGlobalWindow(KV.of("Y", "C")), valueInGlobalWindow(KV.of("Y", "key1:H")), valueInGlobalWindow(KV.of("Y", "key1:I")), valueInGlobalWindow(KV.of("Y", "key1:J")), valueInGlobalWindow(KV.of("Y", "key2:M")), valueInGlobalWindow(KV.of("Y", "key2:N")), valueInGlobalWindow(KV.of("Y", "key2:O"))));
    }
}
Also used : StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) IsEmptyIterable(org.hamcrest.collection.IsEmptyIterable) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Entry(java.util.Map.Entry) BundleProcessor(org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) SideInputHandlerFactory(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.SideInputHandlerFactory) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ExecutableProcessBundleDescriptor(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.ExecutableProcessBundleDescriptor) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) IterableSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.IterableSideInputHandler) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) MultimapSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.MultimapSideInputHandler) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Test(org.junit.Test)

Example 70 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class RemoteExecutionTest method testMetrics.

@Test
@SuppressWarnings("FutureReturnValueIgnored")
public void testMetrics() throws Exception {
    launchSdkHarness(PipelineOptionsFactory.create());
    MetricsDoFn metricsDoFn = new MetricsDoFn();
    Pipeline p = Pipeline.create();
    PCollection<String> input = p.apply("impulse", Impulse.create()).apply("create", ParDo.of(metricsDoFn)).setCoder(StringUtf8Coder.of());
    SingleOutput<String, String> pardo = ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
            // Output the element twice to keep unique numbers in asserts, 6 output elements.
            ctxt.output(ctxt.element());
            ctxt.output(ctxt.element());
        }
    });
    input.apply("processA", pardo).setCoder(StringUtf8Coder.of());
    input.apply("processB", pardo).setCoder(StringUtf8Coder.of());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
    Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> true);
    checkState(optionalStage.isPresent(), "Expected a stage with side inputs.");
    ExecutableStage stage = optionalStage.get();
    ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
    BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator);
    Map<String, Coder> remoteOutputCoders = descriptor.getRemoteOutputCoders();
    Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
    for (Entry<String, Coder> remoteOutputCoder : remoteOutputCoders.entrySet()) {
        List<WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
        outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputContents::add));
    }
    final String testPTransformId = "create-ParMultiDo-Metrics-";
    BundleProgressHandler progressHandler = new BundleProgressHandler() {

        @Override
        public void onProgress(ProcessBundleProgressResponse response) {
            MetricsDoFn.ALLOW_COMPLETION.get(metricsDoFn.uuid).countDown();
            List<Matcher<MonitoringInfo>> matchers = new ArrayList<>();
            // We expect all user counters except for the ones in @FinishBundle
            // Since non-user metrics are registered at bundle creation time, they will still report
            // values most of which will be 0.
            SimpleMonitoringInfoBuilder builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.PROCESS_USER_COUNTER_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64SumValue(1);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.START_USER_COUNTER_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64SumValue(10);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.FINISH_USER_COUNTER_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            matchers.add(not(MonitoringInfoMatchers.matchSetFields(builder.build())));
            // User Distributions.
            builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.PROCESS_USER_DISTRIBUTION_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64DistributionValue(DistributionData.create(1, 1, 1, 1));
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.START_USER_DISTRIBUTION_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64DistributionValue(DistributionData.create(10, 1, 10, 10));
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.FINISH_USER_DISTRIBUTION_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            matchers.add(not(MonitoringInfoMatchers.matchSetFields(builder.build())));
            assertThat(response.getMonitoringInfosList(), Matchers.hasItems(matchers.toArray(new Matcher[0])));
        }

        @Override
        public void onCompleted(ProcessBundleResponse response) {
            List<Matcher<MonitoringInfo>> matchers = new ArrayList<>();
            // User Counters.
            SimpleMonitoringInfoBuilder builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.PROCESS_USER_COUNTER_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64SumValue(1);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.START_USER_COUNTER_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64SumValue(10);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.FINISH_USER_COUNTER_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64SumValue(100);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            // User Distributions.
            builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.PROCESS_USER_DISTRIBUTION_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64DistributionValue(DistributionData.create(1, 1, 1, 1));
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.START_USER_DISTRIBUTION_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64DistributionValue(DistributionData.create(10, 1, 10, 10));
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.FINISH_USER_DISTRIBUTION_NAME);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            builder.setInt64DistributionValue(DistributionData.create(100, 1, 100, 100));
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            // The element counter should be counted only once for the pcollection.
            // So there should be only two elements.
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
            builder.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "impulse.out");
            builder.setInt64SumValue(1);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
            builder.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "create/ParMultiDo(Metrics).output");
            builder.setInt64SumValue(3);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            // Verify that the element count is not double counted if two PCollections consume it.
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
            builder.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "processA/ParMultiDo(Anonymous).output");
            builder.setInt64SumValue(6);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
            builder.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "processB/ParMultiDo(Anonymous).output");
            builder.setInt64SumValue(6);
            matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
            // Check for execution time metrics for the testPTransformId
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(MonitoringInfoConstants.Urns.START_BUNDLE_MSECS);
            builder.setType(TypeUrns.SUM_INT64_TYPE);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            matchers.add(allOf(MonitoringInfoMatchers.matchSetFields(builder.build()), MonitoringInfoMatchers.counterValueGreaterThanOrEqualTo(1)));
            // Check for execution time metrics for the testPTransformId
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(Urns.PROCESS_BUNDLE_MSECS);
            builder.setType(TypeUrns.SUM_INT64_TYPE);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            matchers.add(allOf(MonitoringInfoMatchers.matchSetFields(builder.build()), MonitoringInfoMatchers.counterValueGreaterThanOrEqualTo(2)));
            builder = new SimpleMonitoringInfoBuilder();
            builder.setUrn(Urns.FINISH_BUNDLE_MSECS);
            builder.setType(TypeUrns.SUM_INT64_TYPE);
            builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
            matchers.add(allOf(MonitoringInfoMatchers.matchSetFields(builder.build()), MonitoringInfoMatchers.counterValueGreaterThanOrEqualTo(3)));
            assertThat(response.getMonitoringInfosList(), Matchers.hasItems(matchers.toArray(new Matcher[0])));
        }
    };
    ExecutorService executor = Executors.newSingleThreadExecutor();
    try (RemoteBundle bundle = processor.newBundle(outputReceivers, StateRequestHandler.unsupported(), progressHandler)) {
        Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "X")));
        executor.submit(() -> {
            checkState(MetricsDoFn.AFTER_PROCESS.get(metricsDoFn.uuid).await(60, TimeUnit.SECONDS), "Runner waited too long for DoFn to get to AFTER_PROCESS.");
            bundle.requestProgress();
            return (Void) null;
        });
    }
    executor.shutdown();
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Matcher(org.hamcrest.Matcher) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) SimpleMonitoringInfoBuilder(org.apache.beam.runners.core.metrics.SimpleMonitoringInfoBuilder) BundleProcessor(org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ExecutableProcessBundleDescriptor(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.ExecutableProcessBundleDescriptor) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) Pipeline(org.apache.beam.sdk.Pipeline) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) Test(org.junit.Test)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)45 Test (org.junit.Test)45 Pipeline (org.apache.beam.sdk.Pipeline)25 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)24 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 Map (java.util.Map)21 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)21 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)17 ArrayList (java.util.ArrayList)16 HashMap (java.util.HashMap)14 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)13 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 PCollection (org.apache.beam.sdk.values.PCollection)12 Coder (org.apache.beam.sdk.coders.Coder)11 KV (org.apache.beam.sdk.values.KV)11 Collection (java.util.Collection)10 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)9