use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class DataflowPipelineJob method waitUntilFinish.
/**
* Waits until the pipeline finishes and returns the final status.
*
* @param duration The time to wait for the job to finish. Provide a value less than 1 ms for an
* infinite wait.
* @param messageHandler If non null this handler will be invoked for each batch of messages
* received.
* @param sleeper A sleeper to use to sleep between attempts.
* @param nanoClock A nanoClock used to time the total time taken.
* @return The final state of the job or null on timeout.
* @throws IOException If there is a persistent problem getting job information.
* @throws InterruptedException if the thread is interrupted.
*/
@VisibleForTesting
@Nullable
State waitUntilFinish(Duration duration, MonitoringUtil.@Nullable JobMessagesHandler messageHandler, Sleeper sleeper, NanoClock nanoClock, MonitoringUtil monitor) throws IOException, InterruptedException {
BackOff backoff = getMessagesBackoff(duration);
// This function tracks the cumulative time from the *first request* to enforce the wall-clock
// limit. Any backoff instance could, at best, track the the time since the first attempt at a
// given request. Thus, we need to track the cumulative time ourselves.
long startNanos = nanoClock.nanoTime();
State state = State.UNKNOWN;
Exception exception;
do {
exception = null;
try {
// Get the state of the job before listing messages. This ensures we always fetch job
// messages after the job finishes to ensure we have all them.
state = getStateWithRetries(BackOffAdapter.toGcpBackOff(STATUS_BACKOFF_FACTORY.withMaxRetries(0).backoff()), sleeper);
} catch (IOException e) {
exception = e;
LOG.warn("Failed to get job state: {}", e.getMessage());
LOG.debug("Failed to get job state.", e);
continue;
}
exception = processJobMessages(messageHandler, monitor);
if (exception != null) {
continue;
}
// We can stop if the job is done.
if (state.isTerminal()) {
logTerminalState(state);
return state;
}
// Reset attempts count and update cumulative wait time.
backoff = resetBackoff(duration, nanoClock, startNanos);
} while (BackOffUtils.next(sleeper, backoff));
if (exception == null) {
LOG.warn("No terminal state was returned within allotted timeout. State value {}", state);
} else {
LOG.error("Failed to fetch job metadata.", exception);
}
return null;
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class FlinkExecutionEnvironments method createStreamExecutionEnvironment.
@VisibleForTesting
static StreamExecutionEnvironment createStreamExecutionEnvironment(FlinkPipelineOptions options, List<String> filesToStage, @Nullable String confDir) {
LOG.info("Creating a Streaming Environment.");
// Although Flink uses Rest, it expects the address not to contain a http scheme
String masterUrl = stripHttpSchema(options.getFlinkMaster());
Configuration flinkConfiguration = getFlinkConfiguration(confDir);
StreamExecutionEnvironment flinkStreamEnv;
// depending on the master, create the right environment.
if ("[local]".equals(masterUrl)) {
setManagedMemoryByFraction(flinkConfiguration);
disableClassLoaderLeakCheck(flinkConfiguration);
flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment(getDefaultLocalParallelism(), flinkConfiguration);
} else if ("[auto]".equals(masterUrl)) {
flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
if (flinkStreamEnv instanceof LocalStreamEnvironment) {
disableClassLoaderLeakCheck(flinkConfiguration);
flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment(getDefaultLocalParallelism(), flinkConfiguration);
}
} else {
int defaultPort = flinkConfiguration.getInteger(RestOptions.PORT);
HostAndPort hostAndPort = HostAndPort.fromString(masterUrl).withDefaultPort(defaultPort);
flinkConfiguration.setInteger(RestOptions.PORT, hostAndPort.getPort());
final SavepointRestoreSettings savepointRestoreSettings;
if (options.getSavepointPath() != null) {
savepointRestoreSettings = SavepointRestoreSettings.forPath(options.getSavepointPath(), options.getAllowNonRestoredState());
} else {
savepointRestoreSettings = SavepointRestoreSettings.none();
}
flinkStreamEnv = new RemoteStreamEnvironment(hostAndPort.getHost(), hostAndPort.getPort(), flinkConfiguration, filesToStage.toArray(new String[filesToStage.size()]), null, savepointRestoreSettings);
LOG.info("Using Flink Master URL {}:{}.", hostAndPort.getHost(), hostAndPort.getPort());
}
// Set the parallelism, required by UnboundedSourceWrapper to generate consistent splits.
final int parallelism = determineParallelism(options.getParallelism(), flinkStreamEnv.getParallelism(), flinkConfiguration);
flinkStreamEnv.setParallelism(parallelism);
if (options.getMaxParallelism() > 0) {
flinkStreamEnv.setMaxParallelism(options.getMaxParallelism());
}
// set parallelism in the options (required by some execution code)
options.setParallelism(parallelism);
if (options.getObjectReuse()) {
flinkStreamEnv.getConfig().enableObjectReuse();
} else {
flinkStreamEnv.getConfig().disableObjectReuse();
}
// default to event time
flinkStreamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// for the following 2 parameters, a value of -1 means that Flink will use
// the default values as specified in the configuration.
int numRetries = options.getNumberOfExecutionRetries();
if (numRetries != -1) {
flinkStreamEnv.setNumberOfExecutionRetries(numRetries);
}
long retryDelay = options.getExecutionRetryDelay();
if (retryDelay != -1) {
flinkStreamEnv.getConfig().setExecutionRetryDelay(retryDelay);
}
configureCheckpointing(options, flinkStreamEnv);
applyLatencyTrackingInterval(flinkStreamEnv.getConfig(), options);
if (options.getAutoWatermarkInterval() != null) {
flinkStreamEnv.getConfig().setAutoWatermarkInterval(options.getAutoWatermarkInterval());
}
configureStateBackend(options, flinkStreamEnv);
return flinkStreamEnv;
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class QueryablePipeline method getPrimitiveTransformIds.
/**
* Produces a {@link RunnerApi.Components} which contains only primitive transforms.
*/
@VisibleForTesting
static Collection<String> getPrimitiveTransformIds(RunnerApi.Components components) {
Collection<String> ids = new LinkedHashSet<>();
for (Map.Entry<String, PTransform> transformEntry : components.getTransformsMap().entrySet()) {
PTransform transform = transformEntry.getValue();
boolean isPrimitive = isPrimitiveTransform(transform);
if (isPrimitive) {
// Sometimes "primitive" transforms have sub-transforms (and even deeper-nested
// descendents), due to runners
// either rewriting them in terms of runner-specific transforms, or SDKs constructing them
// in terms of other
// underlying transforms (see https://issues.apache.org/jira/browse/BEAM-5441).
// We consider any "leaf" descendents of these "primitive" transforms to be the true
// "primitives" that we
// preserve here; in the common case, this is just the "primitive" itself, which has no
// descendents).
Deque<String> transforms = new ArrayDeque<>();
transforms.push(transformEntry.getKey());
while (!transforms.isEmpty()) {
String id = transforms.pop();
PTransform next = components.getTransformsMap().get(id);
List<String> subtransforms = next.getSubtransformsList();
if (subtransforms.isEmpty()) {
ids.add(id);
} else {
transforms.addAll(subtransforms);
}
}
}
}
return ids;
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class WindmillStateInternals method encodeKey.
/**
* Encodes the given namespace and address as {@code <namespace>+<address>}.
*/
@VisibleForTesting
static ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
try {
// Use ByteString.Output rather than concatenation and String.format. We build these keys
// a lot, and this leads to better performance results. See associated benchmarks.
ByteString.Output stream = ByteString.newOutput();
OutputStreamWriter writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8);
// stringKey starts and ends with a slash. We separate it from the
// StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
// ID comes from the user.
namespace.appendTo(writer);
writer.write('+');
address.appendTo(writer);
writer.flush();
return stream.toByteString();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class WorkItemStatusClient method populateMetricUpdates.
@VisibleForTesting
synchronized void populateMetricUpdates(WorkItemStatus status) {
List<MetricUpdate> updates = new ArrayList<>();
if (executionContext != null && executionContext.getExecutionStateTracker() != null) {
ExecutionStateTracker tracker = executionContext.getExecutionStateTracker();
MetricUpdate update = new MetricUpdate();
update.setKind("internal");
MetricStructuredName name = new MetricStructuredName();
name.setName("state-sampler");
update.setName(name);
Map<String, Object> metric = new HashMap<>();
ExecutionState state = tracker.getCurrentState();
if (state != null) {
metric.put("last-state-name", state.getDescription());
}
metric.put("num-transitions", tracker.getNumTransitions());
metric.put("last-state-duration-ms", tracker.getMillisSinceLastTransition());
update.setInternal(metric);
updates.add(update);
}
status.setMetricUpdates(updates);
}
Aggregations