Search in sources :

Example 1 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class DataflowElementExecutionTrackerTest method setUp.

@Before
public void setUp() {
    counters = new CounterSet();
    options = PipelineOptionsFactory.as(DataflowPipelineDebugOptions.class);
    options.setExperiments(Lists.newArrayList(DataflowElementExecutionTracker.TIME_PER_ELEMENT_EXPERIMENT));
    tracker = DataflowElementExecutionTracker.create(counters, options);
}
Also used : CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) Before(org.junit.Before)

Example 2 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class BatchDataflowWorker method doWork.

/**
 * Performs the given work; returns true if successful.
 *
 * @throws IOException Only if the WorkUnitClient fails.
 */
@VisibleForTesting
boolean doWork(WorkItem workItem, WorkItemStatusClient workItemStatusClient) throws IOException {
    LOG.debug("Executing: {}", workItem);
    DataflowWorkExecutor worker = null;
    SdkWorkerHarness sdkWorkerHarness = sdkHarnessRegistry.getAvailableWorkerAndAssignWork();
    try {
        // Populate PipelineOptions with data from work unit.
        options.setProject(workItem.getProjectId());
        final String stageName;
        if (workItem.getMapTask() != null) {
            stageName = workItem.getMapTask().getStageName();
        } else if (workItem.getSourceOperationTask() != null) {
            stageName = workItem.getSourceOperationTask().getStageName();
        } else {
            throw new RuntimeException("Unknown kind of work item: " + workItem.toString());
        }
        CounterSet counterSet = new CounterSet();
        BatchModeExecutionContext executionContext = BatchModeExecutionContext.create(counterSet, sideInputDataCache, sideInputWeakReferenceCache, readerRegistry, options, stageName, String.valueOf(workItem.getId()));
        if (workItem.getMapTask() != null) {
            MutableNetwork<Node, Edge> network = mapTaskToNetwork.apply(workItem.getMapTask());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Network as Graphviz .dot: {}", Networks.toDot(network));
            }
            worker = mapTaskExecutorFactory.create(sdkWorkerHarness.getControlClientHandler(), sdkWorkerHarness.getGrpcDataFnServer(), sdkHarnessRegistry.beamFnDataApiServiceDescriptor(), sdkWorkerHarness.getGrpcStateFnServer(), network, options, stageName, readerRegistry, sinkRegistry, executionContext, counterSet, idGenerator);
        } else if (workItem.getSourceOperationTask() != null) {
            worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask(), counterSet, executionContext, stageName);
        } else {
            throw new IllegalStateException("Work Item was neither a MapTask nor a SourceOperation");
        }
        workItemStatusClient.setWorker(worker, executionContext);
        DataflowWorkProgressUpdater progressUpdater = new DataflowWorkProgressUpdater(workItemStatusClient, workItem, worker, options);
        executeWork(worker, progressUpdater);
        workItemStatusClient.reportSuccess();
        return true;
    } catch (Throwable e) {
        workItemStatusClient.reportError(e);
        return false;
    } finally {
        if (worker != null) {
            try {
                worker.close();
            } catch (Exception exn) {
                LOG.warn("Uncaught exception while closing worker. All work has already committed or " + "been marked for retry.", exn);
            }
        }
        if (sdkWorkerHarness != null) {
            sdkHarnessRegistry.completeWork(sdkWorkerHarness);
        }
    }
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) SdkWorkerHarness(org.apache.beam.runners.dataflow.worker.SdkHarnessRegistry.SdkWorkerHarness) IOException(java.io.IOException) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 3 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class TestOperationContext method create.

public static TestOperationContext create() {
    CounterSet counterSet = new CounterSet();
    NameContext nameContext = NameContextsForTests.nameContextForTest();
    return create(counterSet, nameContext);
}
Also used : CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext)

Example 4 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class UserParDoFnFactoryTest method testFactoryDoesNotReuseAfterAborted.

@Test
public void testFactoryDoesNotReuseAfterAborted() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    CounterSet counters = new CounterSet();
    TestDoFn initialFn = new TestDoFn(Collections.<TupleTag<String>>emptyList());
    CloudObject cloudObject = getCloudObject(initialFn);
    ParDoFn parDoFn = factory.create(options, cloudObject, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create(counters));
    Receiver rcvr = new OutputReceiver();
    parDoFn.startBundle(rcvr);
    parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
    TestDoFn fn = (TestDoFn) ((SimpleParDoFn) parDoFn).getDoFnInfo().getDoFn();
    parDoFn.abort();
    assertThat(fn.state, equalTo(TestDoFn.State.TORN_DOWN));
    // The fn should not be torn down here
    ParDoFn secondParDoFn = factory.create(options, cloudObject.clone(), null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create(counters));
    secondParDoFn.startBundle(rcvr);
    secondParDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
    TestDoFn secondFn = (TestDoFn) ((SimpleParDoFn) secondParDoFn).getDoFnInfo().getDoFn();
    assertThat(secondFn, not(theInstance(fn)));
    assertThat(fn.state, equalTo(TestDoFn.State.TORN_DOWN));
    assertThat(secondFn.state, equalTo(TestDoFn.State.PROCESSING));
}
Also used : CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) Test(org.junit.Test)

Example 5 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class UserParDoFnFactoryTest method testCleanupRegistered.

@Test
public void testCleanupRegistered() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    CounterSet counters = new CounterSet();
    DoFn<?, ?> initialFn = new TestStatefulDoFn();
    CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.globalDefault().withWindowFn(FixedWindows.of(Duration.millis(10))));
    TimerInternals timerInternals = mock(TimerInternals.class);
    DataflowStepContext stepContext = mock(DataflowStepContext.class);
    when(stepContext.timerInternals()).thenReturn(timerInternals);
    DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
    TestOperationContext operationContext = TestOperationContext.create(counters);
    when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
    when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
    ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
    Receiver rcvr = new OutputReceiver();
    parDoFn.startBundle(rcvr);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
    parDoFn.processElement(WindowedValue.of("foo", new Instant(1), firstWindow, PaneInfo.NO_FIRING));
    verify(stepContext).setStateCleanupTimer(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindow, IntervalWindow.getCoder(), firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)));
}
Also used : Instant(org.joda.time.Instant) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DataflowStepContext(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowStepContext) TimerInternals(org.apache.beam.runners.core.TimerInternals) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

CounterSet (org.apache.beam.runners.dataflow.worker.counters.CounterSet)22 Test (org.junit.Test)14 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)7 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)7 ExecutionStateTracker (org.apache.beam.runners.core.metrics.ExecutionStateTracker)6 DataflowExecutionStateTracker (org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker)6 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)6 Receiver (org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver)5 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)5 Instant (org.joda.time.Instant)4 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)3 WorkItemStatus (com.google.api.services.dataflow.model.WorkItemStatus)3 Closeable (java.io.Closeable)3 IOException (java.io.IOException)3 CounterStructuredName (com.google.api.services.dataflow.model.CounterStructuredName)2 NameAndKind (com.google.api.services.dataflow.model.NameAndKind)2 ArrayList (java.util.ArrayList)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)2 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)2