Search in sources :

Example 41 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class NestedPayloadKafkaTable method transformOutput.

// Suppress nullability warnings: ProducerRecord is supposed to accept null arguments.
@SuppressWarnings("argument.type.incompatible")
@VisibleForTesting
ProducerRecord<byte[], byte[]> transformOutput(Row row) {
    row = castRow(row, row.getSchema(), schema);
    String topic = Iterables.getOnlyElement(getTopics());
    byte[] key = null;
    byte[] payload;
    List<Header> headers = ImmutableList.of();
    Long timestampMillis = null;
    if (schema.hasField(Schemas.MESSAGE_KEY_FIELD)) {
        key = row.getBytes(Schemas.MESSAGE_KEY_FIELD);
    }
    if (schema.hasField(Schemas.EVENT_TIMESTAMP_FIELD)) {
        ReadableDateTime time = row.getDateTime(Schemas.EVENT_TIMESTAMP_FIELD);
        if (time != null) {
            timestampMillis = time.getMillis();
        }
    }
    if (schema.hasField(Schemas.HEADERS_FIELD)) {
        Collection<Row> headerRows = checkArgumentNotNull(row.getArray(Schemas.HEADERS_FIELD));
        ImmutableList.Builder<Header> headersBuilder = ImmutableList.builder();
        headerRows.forEach(entry -> {
            String headerKey = checkArgumentNotNull(entry.getString(Schemas.HEADERS_KEY_FIELD));
            Collection<byte[]> values = checkArgumentNotNull(entry.getArray(Schemas.HEADERS_VALUES_FIELD));
            values.forEach(value -> headersBuilder.add(new RecordHeader(headerKey, value)));
        });
        headers = headersBuilder.build();
    }
    if (payloadSerializer == null) {
        payload = row.getBytes(Schemas.PAYLOAD_FIELD);
    } else {
        payload = payloadSerializer.serialize(checkArgumentNotNull(row.getRow(Schemas.PAYLOAD_FIELD)));
    }
    return new ProducerRecord<>(topic, null, timestampMillis, key, payload, headers);
}
Also used : ReadableDateTime(org.joda.time.ReadableDateTime) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) RecordHeader(org.apache.kafka.common.header.internals.RecordHeader) Header(org.apache.kafka.common.header.Header) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Cast.castRow(org.apache.beam.sdk.schemas.transforms.Cast.castRow) Row(org.apache.beam.sdk.values.Row) RecordHeader(org.apache.kafka.common.header.internals.RecordHeader) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 42 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class BatchDataflowWorker method doWork.

/**
 * Performs the given work; returns true if successful.
 *
 * @throws IOException Only if the WorkUnitClient fails.
 */
@VisibleForTesting
boolean doWork(WorkItem workItem, WorkItemStatusClient workItemStatusClient) throws IOException {
    LOG.debug("Executing: {}", workItem);
    DataflowWorkExecutor worker = null;
    SdkWorkerHarness sdkWorkerHarness = sdkHarnessRegistry.getAvailableWorkerAndAssignWork();
    try {
        // Populate PipelineOptions with data from work unit.
        options.setProject(workItem.getProjectId());
        final String stageName;
        if (workItem.getMapTask() != null) {
            stageName = workItem.getMapTask().getStageName();
        } else if (workItem.getSourceOperationTask() != null) {
            stageName = workItem.getSourceOperationTask().getStageName();
        } else {
            throw new RuntimeException("Unknown kind of work item: " + workItem.toString());
        }
        CounterSet counterSet = new CounterSet();
        BatchModeExecutionContext executionContext = BatchModeExecutionContext.create(counterSet, sideInputDataCache, sideInputWeakReferenceCache, readerRegistry, options, stageName, String.valueOf(workItem.getId()));
        if (workItem.getMapTask() != null) {
            MutableNetwork<Node, Edge> network = mapTaskToNetwork.apply(workItem.getMapTask());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Network as Graphviz .dot: {}", Networks.toDot(network));
            }
            worker = mapTaskExecutorFactory.create(sdkWorkerHarness.getControlClientHandler(), sdkWorkerHarness.getGrpcDataFnServer(), sdkHarnessRegistry.beamFnDataApiServiceDescriptor(), sdkWorkerHarness.getGrpcStateFnServer(), network, options, stageName, readerRegistry, sinkRegistry, executionContext, counterSet, idGenerator);
        } else if (workItem.getSourceOperationTask() != null) {
            worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask(), counterSet, executionContext, stageName);
        } else {
            throw new IllegalStateException("Work Item was neither a MapTask nor a SourceOperation");
        }
        workItemStatusClient.setWorker(worker, executionContext);
        DataflowWorkProgressUpdater progressUpdater = new DataflowWorkProgressUpdater(workItemStatusClient, workItem, worker, options);
        executeWork(worker, progressUpdater);
        workItemStatusClient.reportSuccess();
        return true;
    } catch (Throwable e) {
        workItemStatusClient.reportError(e);
        return false;
    } finally {
        if (worker != null) {
            try {
                worker.close();
            } catch (Exception exn) {
                LOG.warn("Uncaught exception while closing worker. All work has already committed or " + "been marked for retry.", exn);
            }
        }
        if (sdkWorkerHarness != null) {
            sdkHarnessRegistry.completeWork(sdkWorkerHarness);
        }
    }
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) SdkWorkerHarness(org.apache.beam.runners.dataflow.worker.SdkHarnessRegistry.SdkWorkerHarness) IOException(java.io.IOException) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 43 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class DataflowBatchWorkerHarness method processWork.

@VisibleForTesting
static void processWork(DataflowWorkerHarnessOptions pipelineOptions, final BatchDataflowWorker worker, Sleeper sleeper) throws InterruptedException {
    int numThreads = chooseNumberOfThreads(pipelineOptions);
    ExecutorService executor = pipelineOptions.getExecutorService();
    final List<Callable<Boolean>> tasks = new ArrayList<>();
    LOG.debug("Starting {} worker threads", numThreads);
    // We start the appropriate number of threads.
    for (int i = 0; i < numThreads; ++i) {
        tasks.add(new WorkerThread(worker, sleeper));
    }
    LOG.debug("Waiting for {} worker threads", numThreads);
    // We wait forever unless there is a big problem.
    executor.invokeAll(tasks);
    LOG.error("All threads died.");
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 44 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class ParDoTranslation method fromProto.

@VisibleForTesting
static StateSpec<?> fromProto(RunnerApi.StateSpec stateSpec, RehydratedComponents components) throws IOException {
    switch(stateSpec.getSpecCase()) {
        case READ_MODIFY_WRITE_SPEC:
            return StateSpecs.value(components.getCoder(stateSpec.getReadModifyWriteSpec().getCoderId()));
        case BAG_SPEC:
            return StateSpecs.bag(components.getCoder(stateSpec.getBagSpec().getElementCoderId()));
        case COMBINING_SPEC:
            FunctionSpec combineFnSpec = stateSpec.getCombiningSpec().getCombineFn();
            if (!combineFnSpec.getUrn().equals(CombineTranslation.JAVA_SERIALIZED_COMBINE_FN_URN)) {
                throw new UnsupportedOperationException(String.format("Cannot create %s from non-Java %s: %s", StateSpec.class.getSimpleName(), Combine.CombineFn.class.getSimpleName(), combineFnSpec.getUrn()));
            }
            Combine.CombineFn<?, ?, ?> combineFn = (Combine.CombineFn<?, ?, ?>) SerializableUtils.deserializeFromByteArray(combineFnSpec.getPayload().toByteArray(), Combine.CombineFn.class.getSimpleName());
            // for the CombineFn, by construction
            return StateSpecs.combining((Coder) components.getCoder(stateSpec.getCombiningSpec().getAccumulatorCoderId()), combineFn);
        case MAP_SPEC:
            return StateSpecs.map(components.getCoder(stateSpec.getMapSpec().getKeyCoderId()), components.getCoder(stateSpec.getMapSpec().getValueCoderId()));
        case SET_SPEC:
            return StateSpecs.set(components.getCoder(stateSpec.getSetSpec().getElementCoderId()));
        case SPEC_NOT_SET:
        default:
            throw new IllegalArgumentException(String.format("Unknown %s: %s", RunnerApi.StateSpec.class.getName(), stateSpec));
    }
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Combine(org.apache.beam.sdk.transforms.Combine) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 45 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class DataflowRunner method validateWorkerSettings.

@VisibleForTesting
static void validateWorkerSettings(DataflowPipelineWorkerPoolOptions workerOptions) {
    DataflowPipelineOptions dataflowOptions = workerOptions.as(DataflowPipelineOptions.class);
    validateSdkContainerImageOptions(workerOptions);
    GcpOptions gcpOptions = workerOptions.as(GcpOptions.class);
    Preconditions.checkArgument(gcpOptions.getZone() == null || gcpOptions.getWorkerRegion() == null, "Cannot use option zone with workerRegion. Prefer either workerZone or workerRegion.");
    Preconditions.checkArgument(gcpOptions.getZone() == null || gcpOptions.getWorkerZone() == null, "Cannot use option zone with workerZone. Prefer workerZone.");
    Preconditions.checkArgument(gcpOptions.getWorkerRegion() == null || gcpOptions.getWorkerZone() == null, "workerRegion and workerZone options are mutually exclusive.");
    boolean hasExperimentWorkerRegion = false;
    if (dataflowOptions.getExperiments() != null) {
        for (String experiment : dataflowOptions.getExperiments()) {
            if (experiment.startsWith("worker_region")) {
                hasExperimentWorkerRegion = true;
                break;
            }
        }
    }
    Preconditions.checkArgument(!hasExperimentWorkerRegion || gcpOptions.getWorkerRegion() == null, "Experiment worker_region and option workerRegion are mutually exclusive.");
    Preconditions.checkArgument(!hasExperimentWorkerRegion || gcpOptions.getWorkerZone() == null, "Experiment worker_region and option workerZone are mutually exclusive.");
    if (gcpOptions.getZone() != null) {
        LOG.warn("Option --zone is deprecated. Please use --workerZone instead.");
        gcpOptions.setWorkerZone(gcpOptions.getZone());
        gcpOptions.setZone(null);
    }
}
Also used : GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

VisibleForTesting (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)81 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)17 ParameterizedType (java.lang.reflect.ParameterizedType)15 Type (java.lang.reflect.Type)15 Parameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter)14 BundleFinalizerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter)14 PipelineOptionsParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter)14 RestrictionParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter)14 RestrictionTrackerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter)14 SchemaElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter)14 StateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter)14 TimerFamilyParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter)14 TimerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter)14 WatermarkEstimatorParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter)14 WatermarkEstimatorStateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter)14 WindowParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter)14 TypeParameter (org.apache.beam.sdk.values.TypeParameter)14 DoFn (org.apache.beam.sdk.transforms.DoFn)10 Map (java.util.Map)7