Search in sources :

Example 6 with ExecutableStage

use of org.apache.beam.runners.core.construction.graph.ExecutableStage in project beam by apache.

the class ExecutableStageTranslationTest method testOperatorNameGeneration.

@Test
public /* Test for generating readable operator names during translation. */
void testOperatorNameGeneration() throws Exception {
    Pipeline p = Pipeline.create();
    p.apply(Impulse.create()).apply(ParDo.of(new DoFn<byte[], String>() {

        @ProcessElement
        public void processElement(ProcessContext processContext, OutputReceiver<String> outputReceiver) {
        }
    })).apply("MyName", ParDo.of(new DoFn<String, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext processContext, OutputReceiver<Integer> outputReceiver) {
        }
    })).apply(// Avoid nested Anonymous ParDo
    "Composite/Nested/ParDo", ParDo.of(new DoFn<Integer, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext processContext, OutputReceiver<Integer> outputReceiver) {
        }
    }));
    ExecutableStage firstEnvStage = GreedyPipelineFuser.fuse(PipelineTranslation.toProto(p)).getFusedStages().stream().findFirst().get();
    RunnerApi.ExecutableStagePayload basePayload = RunnerApi.ExecutableStagePayload.parseFrom(firstEnvStage.toPTransform("foo").getSpec().getPayload());
    String executableStageName = ExecutableStageTranslation.generateNameFromStagePayload(basePayload);
    assertThat(executableStageName, is("[3]{ParDo(Anonymous), MyName, Composite}"));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 7 with ExecutableStage

use of org.apache.beam.runners.core.construction.graph.ExecutableStage in project beam by apache.

the class FlinkExecutableStageFunction method getBundleCheckpointHandler.

private BundleCheckpointHandler getBundleCheckpointHandler(ExecutableStage executableStage) {
    if (!hasSDF(executableStage)) {
        sdfStateInternals = null;
        sdfStateInternals = null;
        return response -> {
            throw new UnsupportedOperationException("Self-checkpoint is only supported on splittable DoFn.");
        };
    }
    sdfTimerInternals = new InMemoryTimerInternals();
    sdfStateInternals = InMemoryStateInternals.forKey("sdf_state");
    return new BundleCheckpointHandlers.StateAndTimerBundleCheckpointHandler(key -> sdfTimerInternals, key -> sdfStateInternals, inputCoder, windowCoder);
}
Also used : RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) LoggerFactory(org.slf4j.LoggerFactory) TimerInternals(org.apache.beam.runners.core.TimerInternals) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Locale(java.util.Locale) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) EnumMap(java.util.EnumMap) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) GuardedBy(javax.annotation.concurrent.GuardedBy) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Preconditions(org.apache.flink.util.Preconditions) List(java.util.List) MapPartitionFunction(org.apache.flink.api.common.functions.MapPartitionFunction) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) Coder(org.apache.beam.sdk.coders.Coder) StateTags(org.apache.beam.runners.core.StateTags) BundleCheckpointHandlers(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandlers) ArrayList(java.util.ArrayList) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Collector(org.apache.flink.util.Collector) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) StateInternals(org.apache.beam.runners.core.StateInternals) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Logger(org.slf4j.Logger) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) PipelineTranslatorUtils(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Configuration(org.apache.flink.configuration.Configuration) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) IOException(java.io.IOException) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) FileSystems(org.apache.beam.sdk.io.FileSystems) ExecutableStageContext(org.apache.beam.runners.fnexecution.control.ExecutableStageContext) AbstractRichFunction(org.apache.flink.api.common.functions.AbstractRichFunction) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals)

Example 8 with ExecutableStage

use of org.apache.beam.runners.core.construction.graph.ExecutableStage in project beam by apache.

the class DoFnOp method open.

@Override
@SuppressWarnings("unchecked")
public void open(Config config, Context context, Scheduler<KeyedTimerData<Void>> timerRegistry, OpEmitter<OutT> emitter) {
    this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
    this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
    this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions();
    this.bundleDisabled = samzaPipelineOptions.getMaxBundleSize() <= 1;
    final String stateId = "pardo-" + transformId;
    final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(stateId, context.getTaskContext(), samzaPipelineOptions);
    final FutureCollector<OutT> outputFutureCollector = createFutureCollector();
    this.bundleManager = new BundleManager<>(createBundleProgressListener(), outputFutureCollector, samzaPipelineOptions.getMaxBundleSize(), samzaPipelineOptions.getMaxBundleTimeMs(), timerRegistry, bundleCheckTimerId);
    this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, (Scheduler) timerRegistry, getTimerStateId(signature), nonKeyedStateInternalsFactory, windowingStrategy, isBounded, samzaPipelineOptions);
    this.sideInputHandler = new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null));
    if (isPortable) {
        final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload);
        stageContext = SamzaExecutableStageContextFactory.getInstance().get(jobInfo);
        stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
        this.fnRunner = SamzaDoFnRunners.createPortable(transformId, bundleStateId, windowedValueCoder, executableStage, sideInputMapping, sideInputHandler, nonKeyedStateInternalsFactory, timerInternalsFactory, samzaPipelineOptions, outputManagerFactory.create(emitter, outputFutureCollector), stageBundleFactory, mainOutputTag, idToTupleTagMap, context, transformFullName);
    } else {
        this.fnRunner = SamzaDoFnRunners.create(samzaPipelineOptions, doFn, windowingStrategy, transformFullName, stateId, context, mainOutputTag, sideInputHandler, timerInternalsFactory, keyCoder, outputManagerFactory.create(emitter, outputFutureCollector), inputCoder, sideOutputTags, outputCoders, doFnSchemaInformation, (Map<String, PCollectionView<?>>) sideInputMapping);
    }
    this.pushbackFnRunner = SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler);
    this.pushbackValues = new ArrayList<>();
    final Iterator<SamzaDoFnInvokerRegistrar> invokerReg = ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator();
    if (!invokerReg.hasNext()) {
        // use the default invoker here
        doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, samzaPipelineOptions);
    } else {
        doFnInvoker = Iterators.getOnlyElement(invokerReg).invokerSetupFor(doFn, samzaPipelineOptions, context);
    }
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) Scheduler(org.apache.samza.operators.Scheduler) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Map(java.util.Map) HashMap(java.util.HashMap) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 9 with ExecutableStage

use of org.apache.beam.runners.core.construction.graph.ExecutableStage in project beam by apache.

the class StateRequestHandlersTest method testUserStateCacheTokenGeneration.

@Test
public void testUserStateCacheTokenGeneration() throws Exception {
    ExecutableStage stage = buildExecutableStage("state1", "state2");
    ProcessBundleDescriptors.ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("id", stage, Endpoints.ApiServiceDescriptor.getDefaultInstance());
    InMemoryBagUserStateFactory inMemoryBagUserStateFactory = new InMemoryBagUserStateFactory<>();
    assertThat(inMemoryBagUserStateFactory.handlers.size(), is(0));
    StateRequestHandler stateRequestHandler = StateRequestHandlers.forBagUserStateHandlerFactory(descriptor, inMemoryBagUserStateFactory);
    final BeamFnApi.ProcessBundleRequest.CacheToken cacheToken = assertSingleCacheToken(stateRequestHandler);
    sendGetRequest(stateRequestHandler, "state1");
    assertThat(inMemoryBagUserStateFactory.handlers.size(), is(1));
    assertThat(assertSingleCacheToken(stateRequestHandler), is(cacheToken));
    sendGetRequest(stateRequestHandler, "state2");
    assertThat(inMemoryBagUserStateFactory.handlers.size(), is(2));
    assertThat(assertSingleCacheToken(stateRequestHandler), is(cacheToken));
}
Also used : ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) Test(org.junit.Test)

Example 10 with ExecutableStage

use of org.apache.beam.runners.core.construction.graph.ExecutableStage in project beam by apache.

the class SingleEnvironmentInstanceJobBundleFactoryTest method closeShutsDownEnvironmentsWhenSomeFail.

@Test
public void closeShutsDownEnvironmentsWhenSomeFail() throws Exception {
    Pipeline p = Pipeline.create();
    ExperimentalOptions.addExperiment(p.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
    p.apply("Create", Create.of(1, 2, 3));
    ExecutableStage firstEnvStage = GreedyPipelineFuser.fuse(PipelineTranslation.toProto(p)).getFusedStages().stream().findFirst().get();
    ExecutableStagePayload basePayload = ExecutableStagePayload.parseFrom(firstEnvStage.toPTransform("foo").getSpec().getPayload());
    Environment secondEnv = Environments.createDockerEnvironment("second_env");
    ExecutableStage secondEnvStage = ExecutableStage.fromPayload(basePayload.toBuilder().setEnvironment(secondEnv).build());
    Environment thirdEnv = Environments.createDockerEnvironment("third_env");
    ExecutableStage thirdEnvStage = ExecutableStage.fromPayload(basePayload.toBuilder().setEnvironment(thirdEnv).build());
    RemoteEnvironment firstRemoteEnv = mock(RemoteEnvironment.class, "First Remote Env");
    RemoteEnvironment secondRemoteEnv = mock(RemoteEnvironment.class, "Second Remote Env");
    RemoteEnvironment thirdRemoteEnv = mock(RemoteEnvironment.class, "Third Remote Env");
    when(environmentFactory.createEnvironment(firstEnvStage.getEnvironment(), GENERATED_ID)).thenReturn(firstRemoteEnv);
    when(environmentFactory.createEnvironment(secondEnvStage.getEnvironment(), GENERATED_ID)).thenReturn(secondRemoteEnv);
    when(environmentFactory.createEnvironment(thirdEnvStage.getEnvironment(), GENERATED_ID)).thenReturn(thirdRemoteEnv);
    when(firstRemoteEnv.getInstructionRequestHandler()).thenReturn(instructionRequestHandler);
    when(secondRemoteEnv.getInstructionRequestHandler()).thenReturn(instructionRequestHandler);
    when(thirdRemoteEnv.getInstructionRequestHandler()).thenReturn(instructionRequestHandler);
    factory.forStage(firstEnvStage);
    factory.forStage(secondEnvStage);
    factory.forStage(thirdEnvStage);
    IllegalStateException firstException = new IllegalStateException("first stage");
    doThrow(firstException).when(firstRemoteEnv).close();
    IllegalStateException thirdException = new IllegalStateException("third stage");
    doThrow(thirdException).when(thirdRemoteEnv).close();
    try {
        factory.close();
        fail("Factory close should have thrown");
    } catch (IllegalStateException expected) {
        if (expected.equals(firstException)) {
            assertThat(ImmutableList.copyOf(expected.getSuppressed()), contains(thirdException));
        } else if (expected.equals(thirdException)) {
            assertThat(ImmutableList.copyOf(expected.getSuppressed()), contains(firstException));
        } else {
            throw expected;
        }
        verify(firstRemoteEnv).close();
        verify(secondRemoteEnv).close();
        verify(thirdRemoteEnv).close();
    }
}
Also used : RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)22 Test (org.junit.Test)17 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)16 Pipeline (org.apache.beam.sdk.Pipeline)15 Coder (org.apache.beam.sdk.coders.Coder)14 HashMap (java.util.HashMap)12 FusedPipeline (org.apache.beam.runners.core.construction.graph.FusedPipeline)12 KvCoder (org.apache.beam.sdk.coders.KvCoder)12 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)12 WindowedValue (org.apache.beam.sdk.util.WindowedValue)12 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)11 Map (java.util.Map)10 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)10 ExecutableProcessBundleDescriptor (org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.ExecutableProcessBundleDescriptor)10 BundleProcessor (org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor)10 BigEndianLongCoder (org.apache.beam.sdk.coders.BigEndianLongCoder)10 Collection (java.util.Collection)9 KV (org.apache.beam.sdk.values.KV)9 PCollection (org.apache.beam.sdk.values.PCollection)9 ArrayList (java.util.ArrayList)7