Search in sources :

Example 6 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class GreedyStageFuserTest method sideInputIncludedInStage.

@Test
public void sideInputIncludedInStage() {
    Environment env = Environments.createDockerEnvironment("common");
    PTransform readTransform = PTransform.newBuilder().setUniqueName("read").putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform parDoTransform = PTransform.newBuilder().setUniqueName("parDo").putInputs("input", "read.out").putInputs("side_input", "side_read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
    PCollection sideInputPCollection = PCollection.newBuilder().setUniqueName("side_read.out").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("side_read", PTransform.newBuilder().setUniqueName("side_read").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).putInputs("input", "impulse.out").putOutputs("output", "side_read.out").build()).putPcollections("side_read.out", sideInputPCollection).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
    PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("read", readTransform)));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, readOutput, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform)));
    PTransformNode parDoNode = PipelineNode.pTransform("parDo", parDoTransform);
    SideInputReference sideInputRef = SideInputReference.of(parDoNode, "side_input", PipelineNode.pCollection("side_read.out", sideInputPCollection));
    assertThat(subgraph.getSideInputs(), contains(sideInputRef));
    assertThat(subgraph.getOutputPCollections(), emptyIterable());
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 7 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class GreedyStageFuserTest method materializesWithDifferentEnvConsumer.

@Test
public void materializesWithDifferentEnvConsumer() {
    // (impulse.out) -> parDo -> parDo.out -> window -> window.out
    // Fuses into
    // (impulse.out) -> parDo -> (parDo.out)
    // (parDo.out) -> window -> window.out
    Environment env = Environments.createDockerEnvironment("common");
    PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("out", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PCollection parDoOutput = PCollection.newBuilder().setUniqueName("parDo.out").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", parDoOutput).putTransforms("window", PTransform.newBuilder().putInputs("input", "parDo.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("rare").build()).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("rare", Environments.createDockerEnvironment("rare")).putEnvironments("common", env).build());
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, p.getPerElementConsumers(impulseOutputNode));
    assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("parDo.out", parDoOutput)));
    assertThat(subgraph.getInputPCollection(), equalTo(impulseOutputNode));
    assertThat(subgraph.getEnvironment(), equalTo(env));
    assertThat(subgraph.getTransforms(), contains(PipelineNode.pTransform("parDo", parDoTransform)));
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 8 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class GreedyStageFuserTest method executableStageProducingSideInputMaterializesIt.

@Test
public void executableStageProducingSideInputMaterializesIt() {
    // impulse -- ParDo(createSide)
    // \_ ParDo(processMain) with side input from createSide
    // The ExecutableStage executing createSide must have an output.
    Environment env = Environments.createDockerEnvironment("common");
    PTransform impulse = PTransform.newBuilder().setUniqueName("impulse").putOutputs("output", "impulsePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build();
    PTransform createSide = PTransform.newBuilder().setUniqueName("createSide").putInputs("input", "impulsePC").putOutputs("output", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform processMain = PTransform.newBuilder().setUniqueName("processMain").putInputs("main", "impulsePC").putInputs("side", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
    PCollection sidePC = PCollection.newBuilder().setUniqueName("sidePC").build();
    PCollection impulsePC = PCollection.newBuilder().setUniqueName("impulsePC").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("impulse", impulse).putTransforms("createSide", createSide).putTransforms("processMain", processMain).putPcollections("impulsePC", impulsePC).putPcollections("sidePC", sidePC).putEnvironments("common", env).build());
    PCollectionNode impulseOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("impulse", impulse)));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutput, ImmutableSet.of(PipelineNode.pTransform("createSide", createSide)));
    assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("sidePC", sidePC)));
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 9 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class RemoteExecutionTest method testExecutionWithMultipleStages.

@Test
public void testExecutionWithMultipleStages() throws Exception {
    launchSdkHarness(PipelineOptionsFactory.create());
    Pipeline p = Pipeline.create();
    Function<String, PCollection<String>> pCollectionGenerator = suffix -> p.apply("impulse" + suffix, Impulse.create()).apply("create" + suffix, ParDo.of(new DoFn<byte[], String>() {

        @ProcessElement
        public void process(ProcessContext c) {
            try {
                c.output(CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), c.element()));
            } catch (CoderException e) {
                throw new RuntimeException(e);
            }
        }
    })).setCoder(StringUtf8Coder.of()).apply(ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            c.output("stream" + suffix + c.element());
        }
    }));
    PCollection<String> input1 = pCollectionGenerator.apply("1");
    PCollection<String> input2 = pCollectionGenerator.apply("2");
    PCollection<String> outputMerged = PCollectionList.of(input1).and(input2).apply(Flatten.pCollections());
    outputMerged.apply("createKV", ParDo.of(new DoFn<String, KV<String, String>>() {

        @ProcessElement
        public void process(ProcessContext c) {
            c.output(KV.of(c.element(), ""));
        }
    })).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("gbk", GroupByKey.create());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
    Set<ExecutableStage> stages = fused.getFusedStages();
    assertThat(stages.size(), equalTo(2));
    List<WindowedValue<?>> outputValues = Collections.synchronizedList(new ArrayList<>());
    for (ExecutableStage stage : stages) {
        ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage(stage.toString(), stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
        BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator);
        Map<String, Coder> remoteOutputCoders = descriptor.getRemoteOutputCoders();
        Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
        for (Entry<String, Coder> remoteOutputCoder : remoteOutputCoders.entrySet()) {
            outputReceivers.putIfAbsent(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputValues::add));
        }
        try (RemoteBundle bundle = processor.newBundle(outputReceivers, StateRequestHandler.unsupported(), BundleProgressHandler.ignored())) {
            Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "X")));
        }
    }
    assertThat(outputValues, containsInAnyOrder(valueInGlobalWindow(KV.of("stream1X", "")), valueInGlobalWindow(KV.of("stream2X", ""))));
}
Also used : Arrays(java.util.Arrays) CoderUtils(org.apache.beam.sdk.util.CoderUtils) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) Matchers.not(org.hamcrest.Matchers.not) WindowedValue.valueInGlobalWindow(org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Metrics(org.apache.beam.sdk.metrics.Metrics) Future(java.util.concurrent.Future) GrpcDataService(org.apache.beam.runners.fnexecution.data.GrpcDataService) Map(java.util.Map) SimpleMonitoringInfoBuilder(org.apache.beam.runners.core.metrics.SimpleMonitoringInfoBuilder) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) ExecutableProcessBundleDescriptor(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.ExecutableProcessBundleDescriptor) Iterators(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterators) BagUserStateHandlerFactory(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.BagUserStateHandlerFactory) KvCoder(org.apache.beam.sdk.coders.KvCoder) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) Matchers.allOf(org.hamcrest.Matchers.allOf) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Set(java.util.Set) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Executors(java.util.concurrent.Executors) GrpcLoggingService(org.apache.beam.runners.fnexecution.logging.GrpcLoggingService) Serializable(java.io.Serializable) ManagedChannelFactory(org.apache.beam.sdk.fn.channel.ManagedChannelFactory) MultimapSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.MultimapSideInputHandler) CountDownLatch(java.util.concurrent.CountDownLatch) CoderException(org.apache.beam.sdk.coders.CoderException) CompletionStage(java.util.concurrent.CompletionStage) ProtoOverrides(org.apache.beam.runners.core.construction.graph.ProtoOverrides) Assert.assertFalse(org.junit.Assert.assertFalse) KV(org.apache.beam.sdk.values.KV) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Impulse(org.apache.beam.sdk.transforms.Impulse) View(org.apache.beam.sdk.transforms.View) Optional(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Optional) GrpcStateService(org.apache.beam.runners.fnexecution.state.GrpcStateService) ArrayList(java.util.ArrayList) TimerSpec(org.apache.beam.sdk.state.TimerSpec) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) InProcessServerFactory(org.apache.beam.sdk.fn.server.InProcessServerFactory) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) Assert.assertTrue(org.junit.Assert.assertTrue) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) Test(org.junit.Test) SingleOutput(org.apache.beam.sdk.transforms.ParDo.SingleOutput) ExecutionException(java.util.concurrent.ExecutionException) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Matcher(org.hamcrest.Matcher) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) IsEmptyIterable(org.hamcrest.collection.IsEmptyIterable) StateSpec(org.apache.beam.sdk.state.StateSpec) IsIterableContainingInOrder(org.hamcrest.collection.IsIterableContainingInOrder) ScheduledFuture(java.util.concurrent.ScheduledFuture) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ChannelSplit(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitResponse.ChannelSplit) Urns(org.apache.beam.runners.core.metrics.MonitoringInfoConstants.Urns) GreedyPipelineFuser(org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser) ExperimentalOptions.addExperiment(org.apache.beam.sdk.options.ExperimentalOptions.addExperiment) PCollectionList(org.apache.beam.sdk.values.PCollectionList) GrpcContextHeaderAccessorProvider(org.apache.beam.sdk.fn.server.GrpcContextHeaderAccessorProvider) ResetDateTimeProvider(org.apache.beam.sdk.testing.ResetDateTimeProvider) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) After(org.junit.After) Assert.fail(org.junit.Assert.fail) ProcessBundleSplitResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitResponse) ThreadFactory(java.util.concurrent.ThreadFactory) Flatten(org.apache.beam.sdk.transforms.Flatten) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) IterableSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.IterableSideInputHandler) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) OutboundObserverFactory(org.apache.beam.sdk.fn.stream.OutboundObserverFactory) UUID(java.util.UUID) ThreadFactoryBuilder(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ThreadFactoryBuilder) List(java.util.List) ParDo(org.apache.beam.sdk.transforms.ParDo) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Timer(org.apache.beam.sdk.state.Timer) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Entry(java.util.Map.Entry) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) FnHarness(org.apache.beam.fn.harness.FnHarness) Slf4jLogWriter(org.apache.beam.runners.fnexecution.logging.Slf4jLogWriter) DistributionData(org.apache.beam.runners.core.metrics.DistributionData) BundleProcessor(org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor) DateTimeUtils(org.joda.time.DateTimeUtils) SideInputHandlerFactory(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.SideInputHandlerFactory) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) ExecutionStateSampler(org.apache.beam.runners.core.metrics.ExecutionStateSampler) PipelineTranslation(org.apache.beam.runners.core.construction.PipelineTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Function(java.util.function.Function) ConcurrentMap(java.util.concurrent.ConcurrentMap) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) MonitoringInfoConstants(org.apache.beam.runners.core.metrics.MonitoringInfoConstants) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) TypeUrns(org.apache.beam.runners.core.metrics.MonitoringInfoConstants.TypeUrns) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) MonitoringInfo(org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo) GrpcFnServer(org.apache.beam.sdk.fn.server.GrpcFnServer) GroupByKey(org.apache.beam.sdk.transforms.GroupByKey) WithKeys(org.apache.beam.sdk.transforms.WithKeys) Iterator(java.util.Iterator) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) Matchers(org.hamcrest.Matchers) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) TimeUnit(java.util.concurrent.TimeUnit) SplittableParDoExpander(org.apache.beam.runners.core.construction.graph.SplittableParDoExpander) BagState(org.apache.beam.sdk.state.BagState) StateSpecs(org.apache.beam.sdk.state.StateSpecs) Rule(org.junit.Rule) MonitoringInfoMatchers(org.apache.beam.runners.core.metrics.MonitoringInfoMatchers) Caches(org.apache.beam.fn.harness.Caches) SplitResult(org.apache.beam.sdk.transforms.splittabledofn.SplitResult) Collections(java.util.Collections) BagUserStateHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.BagUserStateHandler) ReadableState(org.apache.beam.sdk.state.ReadableState) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BundleProcessor(org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ExecutableProcessBundleDescriptor(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.ExecutableProcessBundleDescriptor) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) Pipeline(org.apache.beam.sdk.Pipeline) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) CoderException(org.apache.beam.sdk.coders.CoderException) Test(org.junit.Test)

Example 10 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class ProcessBundleDescriptors method addSideInputs.

private static Map<String, Map<String, SideInputSpec>> addSideInputs(ExecutableStage stage, Components.Builder components) throws IOException {
    ImmutableTable.Builder<String, String, SideInputSpec> idsToSpec = ImmutableTable.builder();
    for (SideInputReference sideInputReference : stage.getSideInputs()) {
        // Update the coder specification for side inputs to be length prefixed so that the
        // SDK and Runner agree on how to encode/decode the key, window, and values for
        // side inputs.
        PCollectionNode pcNode = sideInputReference.collection();
        PCollection pc = pcNode.getPCollection();
        String lengthPrefixedCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(pc.getCoderId(), components, false);
        components.putPcollections(pcNode.getId(), pc.toBuilder().setCoderId(lengthPrefixedCoderId).build());
        FullWindowedValueCoder<KV<?, ?>> coder = (FullWindowedValueCoder) WireCoders.instantiateRunnerWireCoder(pcNode, components.build());
        idsToSpec.put(sideInputReference.transform().getId(), sideInputReference.localName(), SideInputSpec.of(sideInputReference.transform().getId(), sideInputReference.localName(), getAccessPattern(sideInputReference), coder.getValueCoder(), coder.getWindowCoder()));
    }
    return idsToSpec.build().rowMap();
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) SideInputReference(org.apache.beam.runners.core.construction.graph.SideInputReference) KV(org.apache.beam.sdk.values.KV) ImmutableTable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableTable) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)45 Test (org.junit.Test)45 Pipeline (org.apache.beam.sdk.Pipeline)25 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)24 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 Map (java.util.Map)21 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)21 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)17 ArrayList (java.util.ArrayList)16 HashMap (java.util.HashMap)14 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)13 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 PCollection (org.apache.beam.sdk.values.PCollection)12 Coder (org.apache.beam.sdk.coders.Coder)11 KV (org.apache.beam.sdk.values.KV)11 Collection (java.util.Collection)10 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)9