Search in sources :

Example 46 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class DataflowPipelineJob method waitUntilFinish.

/**
 * Waits until the pipeline finishes and returns the final status.
 *
 * @param duration The time to wait for the job to finish. Provide a value less than 1 ms for an
 *     infinite wait.
 * @param messageHandler If non null this handler will be invoked for each batch of messages
 *     received.
 * @param sleeper A sleeper to use to sleep between attempts.
 * @param nanoClock A nanoClock used to time the total time taken.
 * @return The final state of the job or null on timeout.
 * @throws IOException If there is a persistent problem getting job information.
 * @throws InterruptedException if the thread is interrupted.
 */
@VisibleForTesting
@Nullable
State waitUntilFinish(Duration duration, MonitoringUtil.@Nullable JobMessagesHandler messageHandler, Sleeper sleeper, NanoClock nanoClock, MonitoringUtil monitor) throws IOException, InterruptedException {
    BackOff backoff = getMessagesBackoff(duration);
    // This function tracks the cumulative time from the *first request* to enforce the wall-clock
    // limit. Any backoff instance could, at best, track the the time since the first attempt at a
    // given request. Thus, we need to track the cumulative time ourselves.
    long startNanos = nanoClock.nanoTime();
    State state = State.UNKNOWN;
    Exception exception;
    do {
        exception = null;
        try {
            // Get the state of the job before listing messages. This ensures we always fetch job
            // messages after the job finishes to ensure we have all them.
            state = getStateWithRetries(BackOffAdapter.toGcpBackOff(STATUS_BACKOFF_FACTORY.withMaxRetries(0).backoff()), sleeper);
        } catch (IOException e) {
            exception = e;
            LOG.warn("Failed to get job state: {}", e.getMessage());
            LOG.debug("Failed to get job state.", e);
            continue;
        }
        exception = processJobMessages(messageHandler, monitor);
        if (exception != null) {
            continue;
        }
        // We can stop if the job is done.
        if (state.isTerminal()) {
            logTerminalState(state);
            return state;
        }
        // Reset attempts count and update cumulative wait time.
        backoff = resetBackoff(duration, nanoClock, startNanos);
    } while (BackOffUtils.next(sleeper, backoff));
    if (exception == null) {
        LOG.warn("No terminal state was returned within allotted timeout. State value {}", state);
    } else {
        LOG.error("Failed to fetch job metadata.", exception);
    }
    return null;
}
Also used : IOException(java.io.IOException) BackOff(com.google.api.client.util.BackOff) GoogleJsonResponseException(com.google.api.client.googleapis.json.GoogleJsonResponseException) SocketTimeoutException(java.net.SocketTimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 47 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class FlinkExecutionEnvironments method createStreamExecutionEnvironment.

@VisibleForTesting
static StreamExecutionEnvironment createStreamExecutionEnvironment(FlinkPipelineOptions options, List<String> filesToStage, @Nullable String confDir) {
    LOG.info("Creating a Streaming Environment.");
    // Although Flink uses Rest, it expects the address not to contain a http scheme
    String masterUrl = stripHttpSchema(options.getFlinkMaster());
    Configuration flinkConfiguration = getFlinkConfiguration(confDir);
    StreamExecutionEnvironment flinkStreamEnv;
    // depending on the master, create the right environment.
    if ("[local]".equals(masterUrl)) {
        setManagedMemoryByFraction(flinkConfiguration);
        disableClassLoaderLeakCheck(flinkConfiguration);
        flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment(getDefaultLocalParallelism(), flinkConfiguration);
    } else if ("[auto]".equals(masterUrl)) {
        flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        if (flinkStreamEnv instanceof LocalStreamEnvironment) {
            disableClassLoaderLeakCheck(flinkConfiguration);
            flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment(getDefaultLocalParallelism(), flinkConfiguration);
        }
    } else {
        int defaultPort = flinkConfiguration.getInteger(RestOptions.PORT);
        HostAndPort hostAndPort = HostAndPort.fromString(masterUrl).withDefaultPort(defaultPort);
        flinkConfiguration.setInteger(RestOptions.PORT, hostAndPort.getPort());
        final SavepointRestoreSettings savepointRestoreSettings;
        if (options.getSavepointPath() != null) {
            savepointRestoreSettings = SavepointRestoreSettings.forPath(options.getSavepointPath(), options.getAllowNonRestoredState());
        } else {
            savepointRestoreSettings = SavepointRestoreSettings.none();
        }
        flinkStreamEnv = new RemoteStreamEnvironment(hostAndPort.getHost(), hostAndPort.getPort(), flinkConfiguration, filesToStage.toArray(new String[filesToStage.size()]), null, savepointRestoreSettings);
        LOG.info("Using Flink Master URL {}:{}.", hostAndPort.getHost(), hostAndPort.getPort());
    }
    // Set the parallelism, required by UnboundedSourceWrapper to generate consistent splits.
    final int parallelism = determineParallelism(options.getParallelism(), flinkStreamEnv.getParallelism(), flinkConfiguration);
    flinkStreamEnv.setParallelism(parallelism);
    if (options.getMaxParallelism() > 0) {
        flinkStreamEnv.setMaxParallelism(options.getMaxParallelism());
    }
    // set parallelism in the options (required by some execution code)
    options.setParallelism(parallelism);
    if (options.getObjectReuse()) {
        flinkStreamEnv.getConfig().enableObjectReuse();
    } else {
        flinkStreamEnv.getConfig().disableObjectReuse();
    }
    // default to event time
    flinkStreamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    // for the following 2 parameters, a value of -1 means that Flink will use
    // the default values as specified in the configuration.
    int numRetries = options.getNumberOfExecutionRetries();
    if (numRetries != -1) {
        flinkStreamEnv.setNumberOfExecutionRetries(numRetries);
    }
    long retryDelay = options.getExecutionRetryDelay();
    if (retryDelay != -1) {
        flinkStreamEnv.getConfig().setExecutionRetryDelay(retryDelay);
    }
    configureCheckpointing(options, flinkStreamEnv);
    applyLatencyTrackingInterval(flinkStreamEnv.getConfig(), options);
    if (options.getAutoWatermarkInterval() != null) {
        flinkStreamEnv.getConfig().setAutoWatermarkInterval(options.getAutoWatermarkInterval());
    }
    configureStateBackend(options, flinkStreamEnv);
    return flinkStreamEnv;
}
Also used : HostAndPort(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.net.HostAndPort) LocalStreamEnvironment(org.apache.flink.streaming.api.environment.LocalStreamEnvironment) Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) RemoteStreamEnvironment(org.apache.flink.streaming.api.environment.RemoteStreamEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 48 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class QueryablePipeline method getPrimitiveTransformIds.

/**
 * Produces a {@link RunnerApi.Components} which contains only primitive transforms.
 */
@VisibleForTesting
static Collection<String> getPrimitiveTransformIds(RunnerApi.Components components) {
    Collection<String> ids = new LinkedHashSet<>();
    for (Map.Entry<String, PTransform> transformEntry : components.getTransformsMap().entrySet()) {
        PTransform transform = transformEntry.getValue();
        boolean isPrimitive = isPrimitiveTransform(transform);
        if (isPrimitive) {
            // Sometimes "primitive" transforms have sub-transforms (and even deeper-nested
            // descendents), due to runners
            // either rewriting them in terms of runner-specific transforms, or SDKs constructing them
            // in terms of other
            // underlying transforms (see https://issues.apache.org/jira/browse/BEAM-5441).
            // We consider any "leaf" descendents of these "primitive" transforms to be the true
            // "primitives" that we
            // preserve here; in the common case, this is just the "primitive" itself, which has no
            // descendents).
            Deque<String> transforms = new ArrayDeque<>();
            transforms.push(transformEntry.getKey());
            while (!transforms.isEmpty()) {
                String id = transforms.pop();
                PTransform next = components.getTransformsMap().get(id);
                List<String> subtransforms = next.getSubtransformsList();
                if (subtransforms.isEmpty()) {
                    ids.add(id);
                } else {
                    transforms.addAll(subtransforms);
                }
            }
        }
    }
    return ids;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Map(java.util.Map) ArrayDeque(java.util.ArrayDeque) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 49 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class WindmillStateInternals method encodeKey.

/**
 * Encodes the given namespace and address as {@code &lt;namespace&gt;+&lt;address&gt;}.
 */
@VisibleForTesting
static ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
    try {
        // Use ByteString.Output rather than concatenation and String.format. We build these keys
        // a lot, and this leads to better performance results. See associated benchmarks.
        ByteString.Output stream = ByteString.newOutput();
        OutputStreamWriter writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8);
        // stringKey starts and ends with a slash.  We separate it from the
        // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
        // ID comes from the user.
        namespace.appendTo(writer);
        writer.write('+');
        address.appendTo(writer);
        writer.flush();
        return stream.toByteString();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 50 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class WorkItemStatusClient method populateMetricUpdates.

@VisibleForTesting
synchronized void populateMetricUpdates(WorkItemStatus status) {
    List<MetricUpdate> updates = new ArrayList<>();
    if (executionContext != null && executionContext.getExecutionStateTracker() != null) {
        ExecutionStateTracker tracker = executionContext.getExecutionStateTracker();
        MetricUpdate update = new MetricUpdate();
        update.setKind("internal");
        MetricStructuredName name = new MetricStructuredName();
        name.setName("state-sampler");
        update.setName(name);
        Map<String, Object> metric = new HashMap<>();
        ExecutionState state = tracker.getCurrentState();
        if (state != null) {
            metric.put("last-state-name", state.getDescription());
        }
        metric.put("num-transitions", tracker.getNumTransitions());
        metric.put("last-state-duration-ms", tracker.getMillisSinceLastTransition());
        update.setInternal(metric);
        updates.add(update);
    }
    status.setMetricUpdates(updates);
}
Also used : ExecutionState(org.apache.beam.runners.core.metrics.ExecutionStateTracker.ExecutionState) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MetricStructuredName(com.google.api.services.dataflow.model.MetricStructuredName) MetricUpdate(com.google.api.services.dataflow.model.MetricUpdate) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

VisibleForTesting (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)81 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)17 ParameterizedType (java.lang.reflect.ParameterizedType)15 Type (java.lang.reflect.Type)15 Parameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter)14 BundleFinalizerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter)14 PipelineOptionsParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter)14 RestrictionParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter)14 RestrictionTrackerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter)14 SchemaElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter)14 StateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter)14 TimerFamilyParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter)14 TimerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter)14 WatermarkEstimatorParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter)14 WatermarkEstimatorStateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter)14 WindowParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter)14 TypeParameter (org.apache.beam.sdk.values.TypeParameter)14 DoFn (org.apache.beam.sdk.transforms.DoFn)10 Map (java.util.Map)7