Search in sources :

Example 21 with State

use of org.apache.beam.sdk.PipelineResult.State in project beam by apache.

the class ExecutorServiceParallelExecutor method start.

@Override
// TODO: [BEAM-4563] Pass Future back to consumer to check for async errors
@SuppressWarnings("FutureReturnValueIgnored")
public void start(DirectGraph graph, RootProviderRegistry rootProviderRegistry) {
    int numTargetSplits = Math.max(3, targetParallelism);
    ImmutableMap.Builder<AppliedPTransform<?, ?, ?>, Queue<CommittedBundle<?>>> pendingRootBundles = ImmutableMap.builder();
    for (AppliedPTransform<?, ?, ?> root : graph.getRootTransforms()) {
        Queue<CommittedBundle<?>> pending = Queues.newArrayDeque();
        try {
            Collection<CommittedBundle<?>> initialInputs = rootProviderRegistry.getInitialInputs(root, numTargetSplits);
            pending.addAll(initialInputs);
        } catch (Exception e) {
            throw UserCodeException.wrap(e);
        }
        pendingRootBundles.put(root, pending);
    }
    evaluationContext.initialize(pendingRootBundles.build());
    final ExecutionDriver executionDriver = QuiescenceDriver.create(evaluationContext, graph, this, visibleUpdates, pendingRootBundles.build());
    executorService.submit(new Runnable() {

        @Override
        public void run() {
            DriverState drive = executionDriver.drive();
            if (drive.isTermainal()) {
                State newPipelineState = State.UNKNOWN;
                switch(drive) {
                    case FAILED:
                        newPipelineState = State.FAILED;
                        break;
                    case SHUTDOWN:
                        newPipelineState = State.DONE;
                        break;
                    case CONTINUE:
                        throw new IllegalStateException(String.format("%s should not be a terminal state", DriverState.CONTINUE));
                    default:
                        throw new IllegalArgumentException(String.format("Unknown %s %s", DriverState.class.getSimpleName(), drive));
                }
                shutdownIfNecessary(newPipelineState);
            } else {
                executorService.submit(this);
            }
        }
    });
}
Also used : ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) UserCodeException(org.apache.beam.sdk.util.UserCodeException) ExecutionDriver(org.apache.beam.runners.local.ExecutionDriver) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) DriverState(org.apache.beam.runners.local.ExecutionDriver.DriverState) State(org.apache.beam.sdk.PipelineResult.State) DriverState(org.apache.beam.runners.local.ExecutionDriver.DriverState) BlockingQueue(java.util.concurrent.BlockingQueue) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Queue(java.util.Queue)

Example 22 with State

use of org.apache.beam.sdk.PipelineResult.State in project beam by apache.

the class TestDataflowRunner method waitForStreamingJobTermination.

/**
 * Return {@code true} if the job succeeded or {@code false} if it terminated in any other manner.
 */
// Job status checked via job.waitUntilFinish
@SuppressWarnings("FutureReturnValueIgnored")
@SuppressFBWarnings("RV_RETURN_VALUE_IGNORED_BAD_PRACTICE")
private boolean waitForStreamingJobTermination(final DataflowPipelineJob job, ErrorMonitorMessagesHandler messageHandler) {
    // In streaming, there are infinite retries, so rather than timeout
    // we try to terminate early by polling and canceling if we see
    // an error message
    options.getExecutorService().submit(new CancelOnError(job, messageHandler));
    // Whether we canceled or not, this gets the final state of the job or times out
    State finalState;
    try {
        finalState = job.waitUntilFinish(Duration.standardSeconds(options.getTestTimeoutSeconds()), messageHandler);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        Thread.interrupted();
        return false;
    }
    // This cancellation may be the second
    if (finalState == null || !finalState.isTerminal()) {
        LOG.info("Dataflow job {} took longer than {} seconds to complete, cancelling.", job.getJobId(), options.getTestTimeoutSeconds());
        try {
            job.cancel();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return false;
    } else {
        return finalState == State.DONE && !messageHandler.hasSeenError();
    }
}
Also used : State(org.apache.beam.sdk.PipelineResult.State) IOException(java.io.IOException) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 23 with State

use of org.apache.beam.sdk.PipelineResult.State in project beam by apache.

the class TestDataflowRunner method checkForPAssertSuccess.

/**
 * Check that PAssert expectations were met.
 *
 * <p>If the pipeline is not in a failed/cancelled state and no PAsserts were used within the
 * pipeline, then this method will state that all PAsserts succeeded.
 *
 * @return Optional.of(false) if we are certain a PAssert failed. Optional.of(true) if we are
 *     certain all PAsserts passed. Optional.absent() if the evidence is inconclusive, including
 *     when the pipeline may have failed for other reasons.
 */
@VisibleForTesting
Optional<Boolean> checkForPAssertSuccess(DataflowPipelineJob job) {
    JobMetrics metrics = getJobMetrics(job);
    if (metrics == null || metrics.getMetrics() == null) {
        LOG.warn("Metrics not present for Dataflow job {}.", job.getJobId());
        return Optional.absent();
    }
    int successes = 0;
    int failures = 0;
    for (MetricUpdate metric : metrics.getMetrics()) {
        if (metric.getName() == null || metric.getName().getContext() == null || !metric.getName().getContext().containsKey(TENTATIVE_COUNTER)) {
            // Don't double count using the non-tentative version of the metric.
            continue;
        }
        if (PAssert.SUCCESS_COUNTER.equals(metric.getName().getName())) {
            successes += ((BigDecimal) metric.getScalar()).intValue();
        } else if (PAssert.FAILURE_COUNTER.equals(metric.getName().getName())) {
            failures += ((BigDecimal) metric.getScalar()).intValue();
        }
    }
    if (failures > 0) {
        LOG.info("Failure result for Dataflow job {}. Found {} success, {} failures out of " + "{} expected assertions.", job.getJobId(), successes, failures, expectedNumberOfAssertions);
        return Optional.of(false);
    } else if (successes >= expectedNumberOfAssertions) {
        LOG.info("Success result for Dataflow job {}." + " Found {} success, {} failures out of {} expected assertions.", job.getJobId(), successes, failures, expectedNumberOfAssertions);
        return Optional.of(true);
    }
    // If the job failed, this is a definite failure. We only cancel jobs when they fail.
    State state = job.getState();
    if (state == State.FAILED || state == State.CANCELLED) {
        LOG.info("Dataflow job {} terminated in failure state {} without reporting a failed assertion", job.getJobId(), state);
        return Optional.absent();
    }
    LOG.info("Inconclusive results for Dataflow job {}." + " Found {} success, {} failures out of {} expected assertions.", job.getJobId(), successes, failures, expectedNumberOfAssertions);
    return Optional.absent();
}
Also used : State(org.apache.beam.sdk.PipelineResult.State) MetricUpdate(com.google.api.services.dataflow.model.MetricUpdate) JobMetrics(com.google.api.services.dataflow.model.JobMetrics) BigDecimal(java.math.BigDecimal) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

State (org.apache.beam.sdk.PipelineResult.State)23 Test (org.junit.Test)18 PipelineResult (org.apache.beam.sdk.PipelineResult)14 Row (org.apache.beam.sdk.values.Row)12 BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)10 IOException (java.io.IOException)4 Job (com.google.api.services.dataflow.model.Job)3 EntityToRow (org.apache.beam.sdk.io.gcp.datastore.EntityToRow)3 ByteString (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.avatica.util.ByteString)3 File (java.io.File)2 Pipeline (org.apache.beam.sdk.Pipeline)2 BeamPushDownIOSourceRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamPushDownIOSourceRel)2 BeamRelNode (org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode)2 Dataflow (com.google.api.services.dataflow.Dataflow)1 Messages (com.google.api.services.dataflow.Dataflow.Projects.Locations.Jobs.Messages)1 JobMetrics (com.google.api.services.dataflow.model.JobMetrics)1 MetricUpdate (com.google.api.services.dataflow.model.MetricUpdate)1 Key (com.google.datastore.v1.Key)1 DatastoreHelper.makeKey (com.google.datastore.v1.client.DatastoreHelper.makeKey)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1