Search in sources :

Example 16 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class IntrinsicMapTaskExecutorTest method testGetOutputCounters.

@Test
@SuppressWarnings("unchecked")
public void testGetOutputCounters() throws Exception {
    List<Operation> operations = Arrays.asList(new Operation[] { createOperation("o1", 1), createOperation("o2", 2), createOperation("o3", 3) });
    ExecutionStateTracker stateTracker = ExecutionStateTracker.newForTest();
    try (IntrinsicMapTaskExecutor executor = IntrinsicMapTaskExecutor.withSharedCounterSet(operations, counterSet, stateTracker)) {
        CounterSet counterSet = executor.getOutputCounters();
        CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
        counterSet.extractUpdates(false, updateExtractor);
        verify(updateExtractor).longSum(eq(named("test-o1-ElementCount")), anyBoolean(), eq(1L));
        verify(updateExtractor).longSum(eq(named("test-o2-ElementCount")), anyBoolean(), eq(2L));
        verify(updateExtractor).longSum(eq(named("test-o3-ElementCount")), anyBoolean(), eq(3L));
        verifyNoMoreInteractions(updateExtractor);
    }
}
Also used : CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation) Test(org.junit.Test)

Example 17 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class SimpleParDoFnTest method testUndeclaredSideOutputs.

@Test
public void testUndeclaredSideOutputs() throws Exception {
    TestDoFn fn = new TestDoFn(ImmutableList.of(new TupleTag<>("declared"), new TupleTag<>("undecl1"), new TupleTag<>("undecl2"), new TupleTag<>("undecl3")));
    DoFnInfo<?, ?> fnInfo = DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
    null, /* input coder */
    MAIN_OUTPUT, DoFnSchemaInformation.create(), Collections.emptyMap());
    CounterSet counters = new CounterSet();
    TestOperationContext operationContext = TestOperationContext.create(counters);
    ParDoFn userParDoFn = new SimpleParDoFn<>(options, DoFnInstanceManagers.cloningPool(fnInfo, options), NullSideInputReader.empty(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0, new TupleTag<String>("declared"), 1), BatchModeExecutionContext.forTesting(options, "testStage").getStepContext(operationContext), operationContext, DoFnSchemaInformation.create(), Collections.emptyMap(), SimpleDoFnRunnerFactory.INSTANCE);
    userParDoFn.startBundle(new TestReceiver(), new TestReceiver());
    thrown.expect(UserCodeException.class);
    thrown.expectCause(instanceOf(IllegalArgumentException.class));
    thrown.expectMessage("Unknown output tag");
    userParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
}
Also used : CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) TupleTag(org.apache.beam.sdk.values.TupleTag) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) Test(org.junit.Test)

Example 18 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class StreamingModeExecutionContextTest method setUp.

@Before
public void setUp() {
    MockitoAnnotations.initMocks(this);
    options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
    CounterSet counterSet = new CounterSet();
    ConcurrentHashMap<String, String> stateNameMap = new ConcurrentHashMap<>();
    stateNameMap.put(NameContextsForTests.nameContextForTest().userName(), "testStateFamily");
    executionContext = new StreamingModeExecutionContext(counterSet, "computationId", new ReaderCache(Duration.standardMinutes(1), Executors.newCachedThreadPool()), stateNameMap, new WindmillStateCache(options.getWorkerCacheMb()).forComputation("comp"), StreamingStepMetricsContainer.createRegistry(), new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), executionStateRegistry.getState(NameContext.forStage("stage"), "other", null, NoopProfileScope.NOOP), counterSet, PipelineOptionsFactory.create(), "test-work-item-id"), executionStateRegistry, Long.MAX_VALUE);
}
Also used : CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Before(org.junit.Before)

Example 19 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class UserParDoFnFactoryTest method testCleanupWorks.

@Test
public void testCleanupWorks() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    CounterSet counters = new CounterSet();
    DoFn<?, ?> initialFn = new TestStatefulDoFn();
    CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    StateInternals stateInternals = InMemoryStateInternals.forKey("dummy");
    // The overarching step context that only ParDoFn gets
    DataflowStepContext stepContext = mock(DataflowStepContext.class);
    // The user step context that the DoFnRunner gets a handle on
    DataflowStepContext userStepContext = mock(DataflowStepContext.class);
    when(stepContext.namespacedToUser()).thenReturn(userStepContext);
    when(stepContext.stateInternals()).thenReturn(stateInternals);
    when(userStepContext.stateInternals()).thenReturn((StateInternals) stateInternals);
    DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
    TestOperationContext operationContext = TestOperationContext.create(counters);
    when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
    when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
    ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
    Receiver rcvr = new OutputReceiver();
    parDoFn.startBundle(rcvr);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    StateNamespace firstWindowNamespace = StateNamespaces.window(windowCoder, firstWindow);
    StateNamespace secondWindowNamespace = StateNamespaces.window(windowCoder, secondWindow);
    StateTag<ValueState<String>> tag = StateTags.tagForSpec(TestStatefulDoFn.STATE_ID, StateSpecs.value(StringUtf8Coder.of()));
    // Set up non-empty state. We don't mock + verify calls to clear() but instead
    // check that state is actually empty. We musn't care how it is accomplished.
    stateInternals.state(firstWindowNamespace, tag).write("first");
    stateInternals.state(secondWindowNamespace, tag).write("second");
    when(userStepContext.getNextFiredTimer(windowCoder)).thenReturn(null);
    when(stepContext.getNextFiredTimer(windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindowNamespace, firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
    // This should fire the timer to clean up the first window
    parDoFn.processTimers();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
    when(stepContext.getNextFiredTimer((Coder) windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, secondWindowNamespace, secondWindow.maxTimestamp().plus(Duration.millis(1L)), secondWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
    // And this should clean up the second window
    parDoFn.processTimers();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Instant(org.joda.time.Instant) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DataflowStepContext(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowStepContext) StateNamespace(org.apache.beam.runners.core.StateNamespace) ValueState(org.apache.beam.sdk.state.ValueState) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 20 with CounterSet

use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.

the class UserParDoFnFactoryTest method testFactorySimultaneousUse.

@Test
public void testFactorySimultaneousUse() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    CounterSet counters = new CounterSet();
    TestDoFn initialFn = new TestDoFn(Collections.<TupleTag<String>>emptyList());
    CloudObject cloudObject = getCloudObject(initialFn);
    ParDoFn parDoFn = factory.create(options, cloudObject, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create(counters));
    // The fn should not be reused while the first ParDoFn is not finished
    ParDoFn secondParDoFn = factory.create(options, cloudObject, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create(counters));
    Receiver rcvr = new OutputReceiver();
    parDoFn.startBundle(rcvr);
    parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
    // Must be after the first call to process element for reallyStartBundle to have been called
    TestDoFn firstDoFn = (TestDoFn) ((SimpleParDoFn) parDoFn).getDoFnInfo().getDoFn();
    secondParDoFn.startBundle(rcvr);
    secondParDoFn.processElement(WindowedValue.valueInGlobalWindow("spam"));
    // Must be after the first call to process element for reallyStartBundle to have been called
    TestDoFn secondDoFn = (TestDoFn) ((SimpleParDoFn) secondParDoFn).getDoFnInfo().getDoFn();
    parDoFn.finishBundle();
    secondParDoFn.finishBundle();
    assertThat(firstDoFn, not(theInstance(secondDoFn)));
    assertThat(firstDoFn.state, equalTo(TestDoFn.State.FINISHED));
    assertThat(secondDoFn.state, equalTo(TestDoFn.State.FINISHED));
}
Also used : CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) Test(org.junit.Test)

Aggregations

CounterSet (org.apache.beam.runners.dataflow.worker.counters.CounterSet)22 Test (org.junit.Test)14 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)7 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)7 ExecutionStateTracker (org.apache.beam.runners.core.metrics.ExecutionStateTracker)6 DataflowExecutionStateTracker (org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker)6 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)6 Receiver (org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver)5 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)5 Instant (org.joda.time.Instant)4 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)3 WorkItemStatus (com.google.api.services.dataflow.model.WorkItemStatus)3 Closeable (java.io.Closeable)3 IOException (java.io.IOException)3 CounterStructuredName (com.google.api.services.dataflow.model.CounterStructuredName)2 NameAndKind (com.google.api.services.dataflow.model.NameAndKind)2 ArrayList (java.util.ArrayList)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)2 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)2