Search in sources :

Example 1 with DockerPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload in project beam by apache.

the class DockerEnvironmentFactory method createEnvironment.

/**
 * Creates a new, active {@link RemoteEnvironment} backed by a local Docker container.
 */
@Override
public RemoteEnvironment createEnvironment(Environment environment, String workerId) throws Exception {
    Preconditions.checkState(environment.getUrn().equals(BeamUrns.getUrn(RunnerApi.StandardEnvironments.Environments.DOCKER)), "The passed environment does not contain a DockerPayload.");
    final RunnerApi.DockerPayload dockerPayload = RunnerApi.DockerPayload.parseFrom(environment.getPayload());
    // Prepare docker invocation.
    String containerImage = dockerPayload.getContainerImage();
    // TODO: https://issues.apache.org/jira/browse/BEAM-4148 The default service address will not
    // work for Docker for Mac.
    String provisionEndpoint = provisioningServiceServer.getApiServiceDescriptor().getUrl();
    ImmutableList.Builder<String> dockerOptsBuilder = ImmutableList.<String>builder().addAll(gcsCredentialArgs()).add("--network=host").add("--env=DOCKER_MAC_CONTAINER=" + System.getenv("DOCKER_MAC_CONTAINER"));
    final boolean retainDockerContainer = pipelineOptions.as(ManualDockerEnvironmentOptions.class).getRetainDockerContainers();
    String semiPersistDir = pipelineOptions.as(RemoteEnvironmentOptions.class).getSemiPersistDir();
    ImmutableList.Builder<String> argsBuilder = ImmutableList.<String>builder().add(String.format("--id=%s", workerId)).add(String.format("--provision_endpoint=%s", provisionEndpoint));
    if (semiPersistDir != null) {
        argsBuilder.add(String.format("--semi_persist_dir=%s", semiPersistDir));
    }
    LOG.debug("Creating Docker Container with ID {}", workerId);
    // Wrap the blocking call to clientSource.get in case an exception is thrown.
    String containerId = null;
    InstructionRequestHandler instructionHandler = null;
    try {
        containerId = docker.runImage(containerImage, dockerOptsBuilder.build(), argsBuilder.build());
        LOG.debug("Created Docker Container with Container ID {}", containerId);
        // Wait on a client from the gRPC server.
        while (instructionHandler == null) {
            try {
                // If the docker is not alive anymore, we abort.
                if (!docker.isContainerRunning(containerId)) {
                    IllegalStateException illegalStateException = new IllegalStateException(String.format("No container running for id %s", containerId));
                    try {
                        String containerLogs = docker.getContainerLogs(containerId);
                        LOG.error("Docker container {} logs:\n{}", containerId, containerLogs);
                    } catch (Exception getLogsException) {
                        illegalStateException.addSuppressed(getLogsException);
                    }
                    throw illegalStateException;
                }
                instructionHandler = clientSource.take(workerId, Duration.ofSeconds(5));
            } catch (TimeoutException timeoutEx) {
                LOG.info("Still waiting for startup of environment {} for worker id {}", dockerPayload.getContainerImage(), workerId);
            } catch (InterruptedException interruptEx) {
                Thread.currentThread().interrupt();
                throw new RuntimeException(interruptEx);
            }
        }
    } catch (Exception e) {
        if (containerId != null) {
            // Kill the launched docker container if we can't retrieve a client for it.
            try {
                docker.killContainer(containerId);
                if (!retainDockerContainer) {
                    docker.removeContainer(containerId);
                }
            } catch (Exception dockerException) {
                e.addSuppressed(dockerException);
            }
        }
        throw e;
    }
    return DockerContainerEnvironment.create(docker, environment, containerId, instructionHandler, retainDockerContainer);
}
Also used : ManualDockerEnvironmentOptions(org.apache.beam.sdk.options.ManualDockerEnvironmentOptions) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) RemoteEnvironmentOptions(org.apache.beam.sdk.options.RemoteEnvironmentOptions) TimeoutException(java.util.concurrent.TimeoutException) InstructionRequestHandler(org.apache.beam.runners.fnexecution.control.InstructionRequestHandler) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with DockerPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload in project beam by apache.

the class DataflowPipelineTranslatorTest method testSetWorkerHarnessContainerImageInPipelineProto.

/**
 * Tests that when (deprecated) {@link
 * DataflowPipelineOptions#setWorkerHarnessContainerImage(String)} pipeline option is set, {@link
 * DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of the default
 * {@link Environment} used when generating the model pipeline proto.
 */
@Test
public void testSetWorkerHarnessContainerImageInPipelineProto() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    String containerImage = "gcr.io/image:foo";
    options.as(DataflowPipelineOptions.class).setWorkerHarnessContainerImage(containerImage);
    Pipeline p = Pipeline.create(options);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
    JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
    RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
    assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
    Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
    DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
    assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) DockerPayload(org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 3 with DockerPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload in project beam by apache.

the class DataflowPipelineTranslatorTest method testSetSdkContainerImageInPipelineProto.

/**
 * Tests that when {@link DataflowPipelineOptions#setSdkContainerImage(String)} pipeline option is
 * set, {@link DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of
 * the default {@link Environment} used when generating the model pipeline proto.
 */
@Test
public void testSetSdkContainerImageInPipelineProto() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    String containerImage = "gcr.io/image:foo";
    options.as(DataflowPipelineOptions.class).setSdkContainerImage(containerImage);
    Pipeline p = Pipeline.create(options);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
    JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
    RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
    assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
    Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
    DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
    assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) DockerPayload(org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 4 with DockerPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload in project beam by apache.

the class DataflowRunner method applySdkEnvironmentOverrides.

protected RunnerApi.Pipeline applySdkEnvironmentOverrides(RunnerApi.Pipeline pipeline, DataflowPipelineDebugOptions options) {
    String sdkHarnessContainerImageOverrides = options.getSdkHarnessContainerImageOverrides();
    if (Strings.isNullOrEmpty(sdkHarnessContainerImageOverrides)) {
        return pipeline;
    }
    String[] overrides = sdkHarnessContainerImageOverrides.split(",", -1);
    if (overrides.length % 2 != 0) {
        throw new RuntimeException("invalid syntax for SdkHarnessContainerImageOverrides: " + options.getSdkHarnessContainerImageOverrides());
    }
    RunnerApi.Pipeline.Builder pipelineBuilder = pipeline.toBuilder();
    RunnerApi.Components.Builder componentsBuilder = pipelineBuilder.getComponentsBuilder();
    componentsBuilder.clearEnvironments();
    for (Map.Entry<String, RunnerApi.Environment> entry : pipeline.getComponents().getEnvironmentsMap().entrySet()) {
        RunnerApi.Environment.Builder environmentBuilder = entry.getValue().toBuilder();
        if (BeamUrns.getUrn(RunnerApi.StandardEnvironments.Environments.DOCKER).equals(environmentBuilder.getUrn())) {
            RunnerApi.DockerPayload dockerPayload;
            try {
                dockerPayload = RunnerApi.DockerPayload.parseFrom(environmentBuilder.getPayload());
            } catch (InvalidProtocolBufferException e) {
                throw new RuntimeException("Error parsing environment docker payload.", e);
            }
            String containerImage = dockerPayload.getContainerImage();
            for (int i = 0; i < overrides.length; i += 2) {
                containerImage = containerImage.replaceAll(overrides[i], overrides[i + 1]);
            }
            environmentBuilder.setPayload(RunnerApi.DockerPayload.newBuilder().setContainerImage(containerImage).build().toByteString());
        }
        componentsBuilder.putEnvironments(entry.getKey(), environmentBuilder.build());
    }
    return pipelineBuilder.build();
}
Also used : InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Pipeline(org.apache.beam.sdk.Pipeline) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)4 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)3 Pipeline (org.apache.beam.sdk.Pipeline)3 DockerPayload (org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload)2 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)2 JobSpecification (org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification)2 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)2 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)2 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)2 Test (org.junit.Test)2 Map (java.util.Map)1 TimeoutException (java.util.concurrent.TimeoutException)1 InstructionRequestHandler (org.apache.beam.runners.fnexecution.control.InstructionRequestHandler)1 ManualDockerEnvironmentOptions (org.apache.beam.sdk.options.ManualDockerEnvironmentOptions)1 RemoteEnvironmentOptions (org.apache.beam.sdk.options.RemoteEnvironmentOptions)1 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)1 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)1 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)1 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)1