Search in sources :

Example 1 with DataflowWorkerHarnessOptions

use of org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions in project beam by apache.

the class DataflowRunnerHarness method main.

/**
 * Fetches and processes work units from the Dataflow service.
 */
public static void main(String[] unusedArgs) throws Exception {
    RunnerApi.@Nullable Pipeline pipeline = DataflowWorkerHarnessHelper.getPipelineFromEnv();
    // This descriptor is used for all services except logging. They are isolated to keep
    // critical traffic protected from best effort traffic.
    ApiServiceDescriptor controlApiService = DataflowWorkerHarnessHelper.getControlDescriptor();
    ApiServiceDescriptor loggingApiService = DataflowWorkerHarnessHelper.getLoggingDescriptor();
    ApiServiceDescriptor statusApiService = DataflowWorkerHarnessHelper.getStatusDescriptor();
    LOG.info("{} started, using port {} for control, {} for logging.", DataflowRunnerHarness.class, controlApiService, loggingApiService);
    DataflowWorkerHarnessHelper.initializeLogging(DataflowRunnerHarness.class);
    DataflowWorkerHarnessOptions pipelineOptions = DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions(DataflowRunnerHarness.class);
    DataflowWorkerHarnessHelper.configureLogging(pipelineOptions);
    // Initialized registered file systems.˜
    FileSystems.setDefaultPipelineOptions(pipelineOptions);
    DataflowPipelineDebugOptions dataflowOptions = pipelineOptions.as(DataflowPipelineDebugOptions.class);
    ServerFactory serverFactory;
    if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll_domain_socket")) {
        serverFactory = ServerFactory.createEpollDomainSocket();
    } else if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll")) {
        serverFactory = ServerFactory.createEpollSocket();
    } else {
        serverFactory = ServerFactory.createDefault();
    }
    ServerStreamObserverFactory streamObserverFactory = ServerStreamObserverFactory.fromOptions(pipelineOptions);
    Server servicesServer = null;
    Server loggingServer = null;
    Server statusServer = null;
    try (BeamFnLoggingService beamFnLoggingService = new BeamFnLoggingService(loggingApiService, DataflowWorkerLoggingInitializer.getSdkLoggingHandler()::publish, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
        BeamFnControlService beamFnControlService = new BeamFnControlService(controlApiService, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
        BeamFnDataGrpcService beamFnDataService = new BeamFnDataGrpcService(pipelineOptions, controlApiService, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
        BeamWorkerStatusGrpcService beamWorkerStatusGrpcService = statusApiService == null ? null : BeamWorkerStatusGrpcService.create(statusApiService, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
        GrpcStateService beamFnStateService = GrpcStateService.create()) {
        servicesServer = serverFactory.create(ImmutableList.of(beamFnControlService, beamFnDataService, beamFnStateService), controlApiService);
        loggingServer = serverFactory.create(ImmutableList.of(beamFnLoggingService), loggingApiService);
        // gRPC server for obtaining SDK harness runtime status information.
        if (beamWorkerStatusGrpcService != null) {
            statusServer = serverFactory.create(ImmutableList.of(beamWorkerStatusGrpcService), statusApiService);
        }
        start(pipeline, pipelineOptions, beamFnControlService, beamFnDataService, controlApiService, beamFnStateService, beamWorkerStatusGrpcService);
        if (statusServer != null) {
            statusServer.shutdown();
        }
        servicesServer.shutdown();
        loggingServer.shutdown();
        // wait 30 secs for outstanding requests to finish.
        if (statusServer != null) {
            statusServer.awaitTermination(30, TimeUnit.SECONDS);
        }
        servicesServer.awaitTermination(30, TimeUnit.SECONDS);
        loggingServer.awaitTermination(30, TimeUnit.SECONDS);
    } finally {
        if (statusServer != null && !statusServer.isTerminated()) {
            statusServer.shutdownNow();
        }
        if (servicesServer != null && !servicesServer.isTerminated()) {
            servicesServer.shutdownNow();
        }
        if (loggingServer != null && !loggingServer.isTerminated()) {
            loggingServer.shutdownNow();
        }
    }
}
Also used : GrpcStateService(org.apache.beam.runners.fnexecution.state.GrpcStateService) ApiServiceDescriptor(org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions) ServerFactory(org.apache.beam.sdk.fn.server.ServerFactory) BeamFnControlService(org.apache.beam.runners.dataflow.worker.fn.BeamFnControlService) ServerStreamObserverFactory(org.apache.beam.runners.dataflow.worker.fn.stream.ServerStreamObserverFactory) BeamFnDataGrpcService(org.apache.beam.runners.dataflow.worker.fn.data.BeamFnDataGrpcService) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) BeamFnLoggingService(org.apache.beam.runners.dataflow.worker.fn.logging.BeamFnLoggingService) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) BeamWorkerStatusGrpcService(org.apache.beam.runners.fnexecution.status.BeamWorkerStatusGrpcService) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 2 with DataflowWorkerHarnessOptions

use of org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions in project beam by apache.

the class DataflowBatchWorkerHarness method main.

/**
 * Creates the worker harness and then runs it.
 */
public static void main(String[] args) throws Exception {
    // Call user-defined initialization immediately upon starting, as is guaranteed in
    // JvmInitializer
    JvmInitializers.runOnStartup();
    DataflowWorkerHarnessHelper.initializeLogging(DataflowBatchWorkerHarness.class);
    DataflowWorkerHarnessOptions pipelineOptions = DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions(DataflowBatchWorkerHarness.class);
    DataflowBatchWorkerHarness batchHarness = new DataflowBatchWorkerHarness(pipelineOptions);
    DataflowWorkerHarnessHelper.configureLogging(pipelineOptions);
    JvmInitializers.runBeforeProcessing(pipelineOptions);
    batchHarness.run();
}
Also used : DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions)

Example 3 with DataflowWorkerHarnessOptions

use of org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions in project beam by apache.

the class WorkerPipelineOptionsFactory method createFromSystemProperties.

/**
 * Creates a set of Dataflow worker harness options based of a set of known system properties.
 * This is meant to only be used from the Dataflow worker harness as a method to bootstrap the
 * worker harness.
 *
 * @return A {@link DataflowWorkerHarnessOptions} object configured for the Dataflow worker
 *     harness.
 */
public static DataflowWorkerHarnessOptions createFromSystemProperties() throws IOException {
    ObjectMapper objectMapper = new ObjectMapper();
    DataflowWorkerHarnessOptions options;
    if (System.getProperties().containsKey("sdk_pipeline_options")) {
        // TODO: remove this method of getting pipeline options, once migration is complete.
        String serializedOptions = System.getProperty("sdk_pipeline_options");
        LOG.info("Worker harness starting with: {}", serializedOptions);
        options = objectMapper.readValue(serializedOptions, PipelineOptions.class).as(DataflowWorkerHarnessOptions.class);
    } else if (System.getProperties().containsKey("sdk_pipeline_options_file")) {
        String filePath = System.getProperty("sdk_pipeline_options_file");
        LOG.info("Loading pipeline options from " + filePath);
        String serializedOptions = new String(Files.readAllBytes(Paths.get(filePath)), StandardCharsets.UTF_8);
        LOG.info("Worker harness starting with: " + serializedOptions);
        options = objectMapper.readValue(serializedOptions, PipelineOptions.class).as(DataflowWorkerHarnessOptions.class);
    } else {
        LOG.info("Using empty PipelineOptions, as none were provided.");
        options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
    }
    // These values will not be known at job submission time and must be provided.
    if (System.getProperties().containsKey("worker_id")) {
        options.setWorkerId(System.getProperty("worker_id"));
    }
    if (System.getProperties().containsKey("job_id")) {
        options.setJobId(System.getProperty("job_id"));
    }
    if (System.getProperties().containsKey("worker_pool")) {
        options.setWorkerPool(System.getProperty("worker_pool"));
    }
    return options;
}
Also used : DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 4 with DataflowWorkerHarnessOptions

use of org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions in project beam by apache.

the class WorkerPipelineOptionsFactoryTest method testCreationFromSystemProperties.

@Test
public void testCreationFromSystemProperties() throws Exception {
    System.getProperties().putAll(ImmutableMap.<String, String>builder().put("worker_id", "test_worker_id").put("job_id", "test_job_id").put("sdk_pipeline_options", "{\"options\":{\"numWorkers\":999}}").build());
    // testing deprecated functionality
    @SuppressWarnings("deprecation") DataflowWorkerHarnessOptions options = WorkerPipelineOptionsFactory.createFromSystemProperties();
    assertEquals("test_worker_id", options.getWorkerId());
    assertEquals("test_job_id", options.getJobId());
    assertEquals(999, options.getNumWorkers());
}
Also used : DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions) Test(org.junit.Test)

Example 5 with DataflowWorkerHarnessOptions

use of org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions in project beam by apache.

the class DataflowWorkerHarnessHelper method initializeGlobalStateAndPipelineOptions.

public static DataflowWorkerHarnessOptions initializeGlobalStateAndPipelineOptions(Class<?> workerHarnessClass) throws Exception {
    /* Extract pipeline options. */
    DataflowWorkerHarnessOptions pipelineOptions = WorkerPipelineOptionsFactory.createFromSystemProperties();
    pipelineOptions.setAppName(workerHarnessClass.getSimpleName());
    /* Configure logging with job-specific properties. */
    DataflowWorkerLoggingMDC.setJobId(pipelineOptions.getJobId());
    DataflowWorkerLoggingMDC.setWorkerId(pipelineOptions.getWorkerId());
    ExperimentContext ec = ExperimentContext.parseFrom(pipelineOptions);
    String experimentName = Experiment.EnableConscryptSecurityProvider.getName();
    if (ec.isEnabled(Experiment.EnableConscryptSecurityProvider)) {
        /* Enable fast SSL provider. */
        LOG.info("Dataflow runner is using conscrypt SSL. To disable this feature, " + "remove the pipeline option --experiments={}", experimentName);
        Security.insertProviderAt(new OpenSSLProvider(), 1);
    } else {
        LOG.info("Not using conscrypt SSL. Note this is the default Java behavior, but may " + "have reduced performance. To use conscrypt SSL pass pipeline option " + "--experiments={}", experimentName);
    }
    return pipelineOptions;
}
Also used : DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions) OpenSSLProvider(org.conscrypt.OpenSSLProvider)

Aggregations

DataflowWorkerHarnessOptions (org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions)8 Test (org.junit.Test)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 File (java.io.File)2 ApiServiceDescriptor (org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor)1 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)1 MetricsLogger (org.apache.beam.runners.core.metrics.MetricsLogger)1 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)1 BeamFnControlService (org.apache.beam.runners.dataflow.worker.fn.BeamFnControlService)1 BeamFnDataGrpcService (org.apache.beam.runners.dataflow.worker.fn.data.BeamFnDataGrpcService)1 BeamFnLoggingService (org.apache.beam.runners.dataflow.worker.fn.logging.BeamFnLoggingService)1 ServerStreamObserverFactory (org.apache.beam.runners.dataflow.worker.fn.stream.ServerStreamObserverFactory)1 GrpcStateService (org.apache.beam.runners.fnexecution.state.GrpcStateService)1 BeamWorkerStatusGrpcService (org.apache.beam.runners.fnexecution.status.BeamWorkerStatusGrpcService)1 ServerFactory (org.apache.beam.sdk.fn.server.ServerFactory)1 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)1 Server (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server)1 Nullable (org.checkerframework.checker.nullness.qual.Nullable)1 OpenSSLProvider (org.conscrypt.OpenSSLProvider)1