Search in sources :

Example 11 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class DefaultJobBundleFactoryTest method createsMultipleEnvironmentOfSingleType.

@Test
public void createsMultipleEnvironmentOfSingleType() throws Exception {
    ServerFactory serverFactory = ServerFactory.createDefault();
    Environment environmentA = Environment.newBuilder().setUrn("env:urn:a").setPayload(ByteString.copyFrom(new byte[1])).build();
    Environment environmentAA = Environment.newBuilder().setUrn("env:urn:a").setPayload(ByteString.copyFrom(new byte[2])).build();
    EnvironmentFactory envFactoryA = mock(EnvironmentFactory.class);
    when(envFactoryA.createEnvironment(eq(environmentA), any())).thenReturn(remoteEnvironment);
    when(envFactoryA.createEnvironment(eq(environmentAA), any())).thenReturn(remoteEnvironment);
    EnvironmentFactory.Provider environmentProviderFactoryA = mock(EnvironmentFactory.Provider.class);
    when(environmentProviderFactoryA.createEnvironmentFactory(any(), any(), any(), any(), any(), any())).thenReturn(envFactoryA);
    when(environmentProviderFactoryA.getServerFactory()).thenReturn(serverFactory);
    Environment environmentB = Environment.newBuilder().setUrn("env:urn:b").build();
    EnvironmentFactory envFactoryB = mock(EnvironmentFactory.class);
    when(envFactoryB.createEnvironment(eq(environmentB), any())).thenReturn(remoteEnvironment);
    EnvironmentFactory.Provider environmentProviderFactoryB = mock(EnvironmentFactory.Provider.class);
    when(environmentProviderFactoryB.createEnvironmentFactory(any(), any(), any(), any(), any(), any())).thenReturn(envFactoryB);
    when(environmentProviderFactoryB.getServerFactory()).thenReturn(serverFactory);
    Map<String, Provider> environmentFactoryProviderMap = ImmutableMap.of(environmentA.getUrn(), environmentProviderFactoryA, environmentB.getUrn(), environmentProviderFactoryB);
    try (DefaultJobBundleFactory bundleFactory = createDefaultJobBundleFactory(environmentFactoryProviderMap)) {
        bundleFactory.forStage(getExecutableStage(environmentA));
        verify(environmentProviderFactoryA, Mockito.times(1)).createEnvironmentFactory(any(), any(), any(), any(), any(), any());
        verify(environmentProviderFactoryB, Mockito.times(0)).createEnvironmentFactory(any(), any(), any(), any(), any(), any());
        verify(envFactoryA, Mockito.times(1)).createEnvironment(eq(environmentA), any());
        verify(envFactoryA, Mockito.times(0)).createEnvironment(eq(environmentAA), any());
        bundleFactory.forStage(getExecutableStage(environmentAA));
        verify(environmentProviderFactoryA, Mockito.times(2)).createEnvironmentFactory(any(), any(), any(), any(), any(), any());
        verify(environmentProviderFactoryB, Mockito.times(0)).createEnvironmentFactory(any(), any(), any(), any(), any(), any());
        verify(envFactoryA, Mockito.times(1)).createEnvironment(eq(environmentA), any());
        verify(envFactoryA, Mockito.times(1)).createEnvironment(eq(environmentAA), any());
    }
}
Also used : EnvironmentFactory(org.apache.beam.runners.fnexecution.environment.EnvironmentFactory) Provider(org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider) RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) ServerFactory(org.apache.beam.sdk.fn.server.ServerFactory) Matchers.containsString(org.hamcrest.Matchers.containsString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Provider(org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider) Test(org.junit.Test)

Example 12 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class DockerEnvironmentFactory method createEnvironment.

/**
 * Creates a new, active {@link RemoteEnvironment} backed by a local Docker container.
 */
@Override
public RemoteEnvironment createEnvironment(Environment environment, String workerId) throws Exception {
    Preconditions.checkState(environment.getUrn().equals(BeamUrns.getUrn(RunnerApi.StandardEnvironments.Environments.DOCKER)), "The passed environment does not contain a DockerPayload.");
    final RunnerApi.DockerPayload dockerPayload = RunnerApi.DockerPayload.parseFrom(environment.getPayload());
    // Prepare docker invocation.
    String containerImage = dockerPayload.getContainerImage();
    // TODO: https://issues.apache.org/jira/browse/BEAM-4148 The default service address will not
    // work for Docker for Mac.
    String provisionEndpoint = provisioningServiceServer.getApiServiceDescriptor().getUrl();
    ImmutableList.Builder<String> dockerOptsBuilder = ImmutableList.<String>builder().addAll(gcsCredentialArgs()).add("--network=host").add("--env=DOCKER_MAC_CONTAINER=" + System.getenv("DOCKER_MAC_CONTAINER"));
    final boolean retainDockerContainer = pipelineOptions.as(ManualDockerEnvironmentOptions.class).getRetainDockerContainers();
    String semiPersistDir = pipelineOptions.as(RemoteEnvironmentOptions.class).getSemiPersistDir();
    ImmutableList.Builder<String> argsBuilder = ImmutableList.<String>builder().add(String.format("--id=%s", workerId)).add(String.format("--provision_endpoint=%s", provisionEndpoint));
    if (semiPersistDir != null) {
        argsBuilder.add(String.format("--semi_persist_dir=%s", semiPersistDir));
    }
    LOG.debug("Creating Docker Container with ID {}", workerId);
    // Wrap the blocking call to clientSource.get in case an exception is thrown.
    String containerId = null;
    InstructionRequestHandler instructionHandler = null;
    try {
        containerId = docker.runImage(containerImage, dockerOptsBuilder.build(), argsBuilder.build());
        LOG.debug("Created Docker Container with Container ID {}", containerId);
        // Wait on a client from the gRPC server.
        while (instructionHandler == null) {
            try {
                // If the docker is not alive anymore, we abort.
                if (!docker.isContainerRunning(containerId)) {
                    IllegalStateException illegalStateException = new IllegalStateException(String.format("No container running for id %s", containerId));
                    try {
                        String containerLogs = docker.getContainerLogs(containerId);
                        LOG.error("Docker container {} logs:\n{}", containerId, containerLogs);
                    } catch (Exception getLogsException) {
                        illegalStateException.addSuppressed(getLogsException);
                    }
                    throw illegalStateException;
                }
                instructionHandler = clientSource.take(workerId, Duration.ofSeconds(5));
            } catch (TimeoutException timeoutEx) {
                LOG.info("Still waiting for startup of environment {} for worker id {}", dockerPayload.getContainerImage(), workerId);
            } catch (InterruptedException interruptEx) {
                Thread.currentThread().interrupt();
                throw new RuntimeException(interruptEx);
            }
        }
    } catch (Exception e) {
        if (containerId != null) {
            // Kill the launched docker container if we can't retrieve a client for it.
            try {
                docker.killContainer(containerId);
                if (!retainDockerContainer) {
                    docker.removeContainer(containerId);
                }
            } catch (Exception dockerException) {
                e.addSuppressed(dockerException);
            }
        }
        throw e;
    }
    return DockerContainerEnvironment.create(docker, environment, containerId, instructionHandler, retainDockerContainer);
}
Also used : ManualDockerEnvironmentOptions(org.apache.beam.sdk.options.ManualDockerEnvironmentOptions) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) RemoteEnvironmentOptions(org.apache.beam.sdk.options.RemoteEnvironmentOptions) TimeoutException(java.util.concurrent.TimeoutException) InstructionRequestHandler(org.apache.beam.runners.fnexecution.control.InstructionRequestHandler) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) TimeoutException(java.util.concurrent.TimeoutException)

Example 13 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class ProcessEnvironmentFactory method createEnvironment.

/**
 * Creates a new, active {@link RemoteEnvironment} backed by a forked process.
 */
@Override
public RemoteEnvironment createEnvironment(Environment environment, String workerId) throws Exception {
    Preconditions.checkState(environment.getUrn().equals(BeamUrns.getUrn(RunnerApi.StandardEnvironments.Environments.PROCESS)), "The passed environment does not contain a ProcessPayload.");
    final RunnerApi.ProcessPayload processPayload = RunnerApi.ProcessPayload.parseFrom(environment.getPayload());
    String executable = processPayload.getCommand();
    String provisionEndpoint = provisioningServiceServer.getApiServiceDescriptor().getUrl();
    String semiPersistDir = pipelineOptions.as(RemoteEnvironmentOptions.class).getSemiPersistDir();
    ImmutableList.Builder<String> argsBuilder = ImmutableList.<String>builder().add(String.format("--id=%s", workerId)).add(String.format("--provision_endpoint=%s", provisionEndpoint));
    if (semiPersistDir != null) {
        argsBuilder.add(String.format("--semi_persist_dir=%s", semiPersistDir));
    }
    LOG.debug("Creating Process for worker ID {}", workerId);
    // Wrap the blocking call to clientSource.get in case an exception is thrown.
    InstructionRequestHandler instructionHandler = null;
    try {
        ProcessManager.RunningProcess process = processManager.startProcess(workerId, executable, argsBuilder.build(), processPayload.getEnvMap());
        // Wait on a client from the gRPC server.
        while (instructionHandler == null) {
            try {
                // If the process is not alive anymore, we abort.
                process.isAliveOrThrow();
                instructionHandler = clientSource.take(workerId, Duration.ofSeconds(5));
            } catch (TimeoutException timeoutEx) {
                LOG.info("Still waiting for startup of environment '{}' for worker id {}", processPayload.getCommand(), workerId);
            } catch (InterruptedException interruptEx) {
                Thread.currentThread().interrupt();
                throw new RuntimeException(interruptEx);
            }
        }
    } catch (Exception e) {
        try {
            processManager.stopProcess(workerId);
        } catch (Exception processKillException) {
            e.addSuppressed(processKillException);
        }
        throw e;
    }
    return ProcessEnvironment.create(processManager, environment, workerId, instructionHandler);
}
Also used : ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) RemoteEnvironmentOptions(org.apache.beam.sdk.options.RemoteEnvironmentOptions) TimeoutException(java.util.concurrent.TimeoutException) InstructionRequestHandler(org.apache.beam.runners.fnexecution.control.InstructionRequestHandler) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) TimeoutException(java.util.concurrent.TimeoutException)

Example 14 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class FnHarnessTest method testLaunchFnHarnessAndTeardownCleanly.

@Test
// failure will cause test to timeout.
@SuppressWarnings("FutureReturnValueIgnored")
public void testLaunchFnHarnessAndTeardownCleanly() throws Exception {
    Function<String, String> environmentVariableMock = mock(Function.class);
    PipelineOptions options = PipelineOptionsFactory.create();
    when(environmentVariableMock.apply("HARNESS_ID")).thenReturn("id");
    when(environmentVariableMock.apply("PIPELINE_OPTIONS")).thenReturn(PipelineOptionsTranslation.toJson(options));
    List<BeamFnApi.LogEntry> logEntries = new ArrayList<>();
    List<BeamFnApi.InstructionResponse> instructionResponses = mock(List.class);
    BeamFnLoggingGrpc.BeamFnLoggingImplBase loggingService = new BeamFnLoggingGrpc.BeamFnLoggingImplBase() {

        @Override
        public StreamObserver<BeamFnApi.LogEntry.List> logging(StreamObserver<LogControl> responseObserver) {
            return TestStreams.withOnNext((BeamFnApi.LogEntry.List entries) -> logEntries.addAll(entries.getLogEntriesList())).withOnCompleted(responseObserver::onCompleted).build();
        }
    };
    BeamFnControlGrpc.BeamFnControlImplBase controlService = new BeamFnControlGrpc.BeamFnControlImplBase() {

        @Override
        public StreamObserver<InstructionResponse> control(StreamObserver<InstructionRequest> responseObserver) {
            CountDownLatch waitForResponses = new CountDownLatch(1);
            options.as(GcsOptions.class).getExecutorService().submit(() -> {
                responseObserver.onNext(INSTRUCTION_REQUEST);
                Uninterruptibles.awaitUninterruptibly(waitForResponses);
                responseObserver.onCompleted();
            });
            return TestStreams.withOnNext((InstructionResponse t) -> {
                instructionResponses.add(t);
                waitForResponses.countDown();
            }).withOnCompleted(waitForResponses::countDown).build();
        }
    };
    Server loggingServer = ServerBuilder.forPort(0).addService(loggingService).build();
    loggingServer.start();
    try {
        Server controlServer = ServerBuilder.forPort(0).addService(controlService).build();
        controlServer.start();
        try {
            Endpoints.ApiServiceDescriptor loggingDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl("localhost:" + loggingServer.getPort()).build();
            Endpoints.ApiServiceDescriptor controlDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl("localhost:" + controlServer.getPort()).build();
            when(environmentVariableMock.apply("LOGGING_API_SERVICE_DESCRIPTOR")).thenReturn(TextFormat.printToString(loggingDescriptor));
            when(environmentVariableMock.apply("CONTROL_API_SERVICE_DESCRIPTOR")).thenReturn(TextFormat.printToString(controlDescriptor));
            FnHarness.main(environmentVariableMock);
        } finally {
            controlServer.shutdownNow();
        }
    } finally {
        loggingServer.shutdownNow();
    }
    // Verify that we first run onStartup functions before even reading the environment, and that
    // we then call beforeProcessing functions before executing instructions.
    InOrder inOrder = inOrder(onStartupMock, beforeProcessingMock, environmentVariableMock, instructionResponses);
    inOrder.verify(onStartupMock).run();
    inOrder.verify(environmentVariableMock, atLeastOnce()).apply(any());
    inOrder.verify(beforeProcessingMock).accept(any());
    inOrder.verify(instructionResponses).add(INSTRUCTION_RESPONSE);
}
Also used : StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) InOrder(org.mockito.InOrder) BeamFnLoggingGrpc(org.apache.beam.model.fnexecution.v1.BeamFnLoggingGrpc) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ArrayList(java.util.ArrayList) InstructionResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionResponse) CountDownLatch(java.util.concurrent.CountDownLatch) Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ArrayList(java.util.ArrayList) List(java.util.List) BeamFnControlGrpc(org.apache.beam.model.fnexecution.v1.BeamFnControlGrpc) Test(org.junit.Test)

Example 15 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class DataflowWorkerHarnessHelper method getPipelineFromEnv.

// TODO: make env logic private to main() so it is never done outside of initializing the process
public static RunnerApi.@Nullable Pipeline getPipelineFromEnv() throws IOException {
    String pipelinePath = System.getenv(PIPELINE_PATH);
    if (pipelinePath == null) {
        LOG.warn("Missing pipeline environment variable '{}'", PIPELINE_PATH);
        return null;
    }
    File pipelineFile = new File(System.getenv(PIPELINE_PATH));
    if (!pipelineFile.exists()) {
        LOG.warn("Pipeline path '{}' does not exist", pipelineFile);
        return null;
    }
    try (FileInputStream inputStream = new FileInputStream(pipelineFile)) {
        RunnerApi.Pipeline pipelineProto = RunnerApi.Pipeline.parseFrom(inputStream);
        LOG.info("Found portable pipeline:\n{}", TextFormat.printToString(pipelineProto));
        return pipelineProto;
    }
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) File(java.io.File) FileInputStream(java.io.FileInputStream)

Aggregations

Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)33 Test (org.junit.Test)28 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)17 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)14 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)13 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)12 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)11 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 Map (java.util.Map)7 RemoteEnvironment (org.apache.beam.runners.fnexecution.environment.RemoteEnvironment)7 Pipeline (org.apache.beam.sdk.Pipeline)7 IOException (java.io.IOException)6 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)5 EnvironmentFactory (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory)5 Provider (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider)5 ServerFactory (org.apache.beam.sdk.fn.server.ServerFactory)5 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 ArrayList (java.util.ArrayList)4