Search in sources :

Example 1 with DataflowExecutionStateTracker

use of org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker in project beam by apache.

the class MapTaskExecutorTest method testPerElementProcessingTimeCounters.

/**
 * Verify counts for the per-element-output-time counter are correct.
 */
@Test
public void testPerElementProcessingTimeCounters() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(DataflowPipelineDebugOptions.class).setExperiments(Lists.newArrayList(DataflowElementExecutionTracker.TIME_PER_ELEMENT_EXPERIMENT));
    ExecutionStateSampler stateSampler = ExecutionStateSampler.newForTest();
    DataflowExecutionStateTracker stateTracker = new DataflowExecutionStateTracker(stateSampler, new TestDataflowExecutionState(NameContext.forStage("test-stage"), "other", null, /* requestingStepName */
    null, /* sideInputIndex */
    null, /* metricsContainer */
    NoopProfileScope.NOOP), counterSet, options, "test-work-item-id");
    NameContext parDoName = nameForStep("s1");
    // Wire a read operation with 3 elements to a ParDoOperation and assert that we count
    // the correct number of elements.
    ReadOperation read = ReadOperation.forTest(new TestReader("a", "b", "c"), new OutputReceiver(), TestOperationContext.create(counterSet, nameForStep("s0"), null, stateTracker));
    ParDoOperation parDo = new ParDoOperation(new NoopParDoFn(), new OutputReceiver[0], TestOperationContext.create(counterSet, parDoName, null, stateTracker));
    parDo.attachInput(read, 0);
    List<Operation> operations = Lists.newArrayList(read, parDo);
    try (MapTaskExecutor executor = new MapTaskExecutor(operations, counterSet, stateTracker)) {
        executor.execute();
    }
    stateSampler.doSampling(100L);
    CounterName counterName = CounterName.named("per-element-processing-time").withOriginalName(parDoName);
    Counter<Long, CounterDistribution> counter = (Counter<Long, CounterDistribution>) counterSet.getExistingCounter(counterName);
    assertThat(counter.getAggregate().getCount(), equalTo(3L));
}
Also used : CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) TestReader(org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils.TestReader) TestDataflowExecutionState(org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) ExecutionStateSampler(org.apache.beam.runners.core.metrics.ExecutionStateSampler) Test(org.junit.Test)

Example 2 with DataflowExecutionStateTracker

use of org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker in project beam by apache.

the class IntrinsicMapTaskExecutorTest method testGetMetricContainers.

@Test
@SuppressWarnings("unchecked")
public /**
 * This test makes sure that any metrics reported within an operation are part of the metric
 * containers returned by {@link getMetricContainers}.
 */
void testGetMetricContainers() throws Exception {
    ExecutionStateTracker stateTracker = new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), new TestDataflowExecutionState(NameContext.forStage("testStage"), "other", null, /* requestingStepName */
    null, /* sideInputIndex */
    null, /* metricsContainer */
    NoopProfileScope.NOOP), new CounterSet(), PipelineOptionsFactory.create(), "test-work-item-id");
    final String o1 = "o1";
    TestOperationContext context1 = createContext(o1, stateTracker);
    final String o2 = "o2";
    TestOperationContext context2 = createContext(o2, stateTracker);
    final String o3 = "o3";
    TestOperationContext context3 = createContext(o3, stateTracker);
    List<Operation> operations = Arrays.asList(new Operation(new OutputReceiver[] {}, context1) {

        @Override
        public void start() throws Exception {
            super.start();
            try (Closeable scope = context.enterStart()) {
                Metrics.counter("TestMetric", "MetricCounter").inc(1L);
            }
        }
    }, new Operation(new OutputReceiver[] {}, context2) {

        @Override
        public void start() throws Exception {
            super.start();
            try (Closeable scope = context.enterStart()) {
                Metrics.counter("TestMetric", "MetricCounter").inc(2L);
            }
        }
    }, new Operation(new OutputReceiver[] {}, context3) {

        @Override
        public void start() throws Exception {
            super.start();
            try (Closeable scope = context.enterStart()) {
                Metrics.counter("TestMetric", "MetricCounter").inc(3L);
            }
        }
    });
    try (IntrinsicMapTaskExecutor executor = IntrinsicMapTaskExecutor.withSharedCounterSet(operations, counterSet, stateTracker)) {
        // Call execute so that we run all the counters
        executor.execute();
        assertThat(context1.metricsContainer().getUpdates().counterUpdates(), contains(metricUpdate("TestMetric", "MetricCounter", o1, 1L)));
        assertThat(context2.metricsContainer().getUpdates().counterUpdates(), contains(metricUpdate("TestMetric", "MetricCounter", o2, 2L)));
        assertThat(context3.metricsContainer().getUpdates().counterUpdates(), contains(metricUpdate("TestMetric", "MetricCounter", o3, 3L)));
    }
}
Also used : Closeable(java.io.Closeable) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) TestDataflowExecutionState(org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation) ExpectedException(org.junit.rules.ExpectedException) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) Test(org.junit.Test)

Example 3 with DataflowExecutionStateTracker

use of org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker in project beam by apache.

the class WorkerCustomSourcesTest method testReadUnboundedReader.

@Test
public void testReadUnboundedReader() throws Exception {
    CounterSet counterSet = new CounterSet();
    StreamingModeExecutionStateRegistry executionStateRegistry = new StreamingModeExecutionStateRegistry(null);
    ReaderCache readerCache = new ReaderCache(Duration.standardMinutes(1), Runnable::run);
    StreamingModeExecutionContext context = new StreamingModeExecutionContext(counterSet, "computationId", readerCache, /*stateNameMap=*/
    ImmutableMap.of(), /*stateCache=*/
    null, StreamingStepMetricsContainer.createRegistry(), new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), executionStateRegistry.getState(NameContext.forStage("stageName"), "other", null, NoopProfileScope.NOOP), counterSet, PipelineOptionsFactory.create(), "test-work-item-id"), executionStateRegistry, Long.MAX_VALUE);
    options.setNumWorkers(5);
    int maxElements = 10;
    DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
    debugOptions.setUnboundedReaderMaxElements(maxElements);
    ByteString state = ByteString.EMPTY;
    for (int i = 0; i < 10 * maxElements; ) /* Incremented in inner loop */
    {
        // Initialize streaming context with state from previous iteration.
        context.start("key", Windmill.WorkItem.newBuilder().setKey(// key is zero-padded index.
        ByteString.copyFromUtf8("0000000000000001")).setWorkToken(// Must be increasing across activations for cache to be used.
        i).setCacheToken(1).setSourceState(// Source state.
        Windmill.SourceState.newBuilder().setState(state).build()).build(), // input watermark
        new Instant(0), // output watermark
        null, // synchronized processing time
        null, // StateReader
        null, // StateFetcher
        null, Windmill.WorkItemCommitRequest.newBuilder());
        @SuppressWarnings({ "unchecked", "rawtypes" }) NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader = (NativeReader) WorkerCustomSources.create((CloudObject) serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options).getSpec(), options, context);
        // Verify data.
        Instant beforeReading = Instant.now();
        int numReadOnThisIteration = 0;
        for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value : ReaderUtils.readAllFromReader(reader)) {
            assertEquals(KV.of(0, i), value.getValue().getValue());
            assertArrayEquals(encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)), value.getValue().getId());
            assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
            assertEquals(i, value.getTimestamp().getMillis());
            i++;
            numReadOnThisIteration++;
        }
        Instant afterReading = Instant.now();
        long maxReadSec = debugOptions.getUnboundedReaderMaxReadTimeSec();
        assertThat(new Duration(beforeReading, afterReading).getStandardSeconds(), lessThanOrEqualTo(maxReadSec + 1));
        assertThat(numReadOnThisIteration, lessThanOrEqualTo(debugOptions.getUnboundedReaderMaxElements()));
        // Extract and verify state modifications.
        context.flushState();
        state = context.getOutputBuilder().getSourceStateUpdates().getState();
        // CountingSource's watermark is the last record + 1.  i is now one past the last record,
        // so the expected watermark is i millis.
        assertEquals(TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
        assertEquals(1, context.getOutputBuilder().getSourceStateUpdates().getFinalizeIdsList().size());
        assertNotNull(readerCache.acquireReader(context.getComputationKey(), context.getWork().getCacheToken(), context.getWorkToken() + 1));
        assertEquals(7L, context.getBacklogBytes());
    }
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Instant(org.joda.time.Instant) StreamingModeExecutionStateRegistry(org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StreamingModeExecutionStateRegistry) Duration(org.joda.time.Duration) KV(org.apache.beam.sdk.values.KV) ValueWithRecordId(org.apache.beam.sdk.values.ValueWithRecordId) NativeReader(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TestCountingSource(org.apache.beam.runners.dataflow.worker.testing.TestCountingSource) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) Test(org.junit.Test)

Example 4 with DataflowExecutionStateTracker

use of org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker in project beam by apache.

the class IntrinsicMapTaskExecutorTest method testPerElementProcessingTimeCounters.

/**
 * Verify counts for the per-element-output-time counter are correct.
 */
@Test
public void testPerElementProcessingTimeCounters() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(DataflowPipelineDebugOptions.class).setExperiments(Lists.newArrayList(DataflowElementExecutionTracker.TIME_PER_ELEMENT_EXPERIMENT));
    DataflowExecutionStateTracker stateTracker = new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), new TestDataflowExecutionState(NameContext.forStage("test-stage"), "other", null, /* requestingStepName */
    null, /* sideInputIndex */
    null, /* metricsContainer */
    NoopProfileScope.NOOP), counterSet, options, "test-work-item-id");
    NameContext parDoName = nameForStep("s1");
    // Wire a read operation with 3 elements to a ParDoOperation and assert that we count
    // the correct number of elements.
    ReadOperation read = ReadOperation.forTest(new TestReader("a", "b", "c"), new OutputReceiver(), TestOperationContext.create(counterSet, nameForStep("s0"), null, stateTracker));
    ParDoOperation parDo = new ParDoOperation(new NoopParDoFn(), new OutputReceiver[0], TestOperationContext.create(counterSet, parDoName, null, stateTracker));
    parDo.attachInput(read, 0);
    List<Operation> operations = Lists.newArrayList(read, parDo);
    try (IntrinsicMapTaskExecutor executor = IntrinsicMapTaskExecutor.withSharedCounterSet(operations, counterSet, stateTracker)) {
        executor.execute();
    }
    CounterName counterName = CounterName.named("per-element-processing-time").withOriginalName(parDoName);
    Counter<Long, CounterDistribution> counter = (Counter<Long, CounterDistribution>) counterSet.getExistingCounter(counterName);
    assertThat(counter.getAggregate().getCount(), equalTo(3L));
}
Also used : CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) TestReader(org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils.TestReader) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) TestDataflowExecutionState(org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) Test(org.junit.Test)

Example 5 with DataflowExecutionStateTracker

use of org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker in project beam by apache.

the class StreamingModeExecutionContextTest method setUp.

@Before
public void setUp() {
    MockitoAnnotations.initMocks(this);
    options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
    CounterSet counterSet = new CounterSet();
    ConcurrentHashMap<String, String> stateNameMap = new ConcurrentHashMap<>();
    stateNameMap.put(NameContextsForTests.nameContextForTest().userName(), "testStateFamily");
    executionContext = new StreamingModeExecutionContext(counterSet, "computationId", new ReaderCache(Duration.standardMinutes(1), Executors.newCachedThreadPool()), stateNameMap, new WindmillStateCache(options.getWorkerCacheMb()).forComputation("comp"), StreamingStepMetricsContainer.createRegistry(), new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), executionStateRegistry.getState(NameContext.forStage("stage"), "other", null, NoopProfileScope.NOOP), counterSet, PipelineOptionsFactory.create(), "test-work-item-id"), executionStateRegistry, Long.MAX_VALUE);
}
Also used : CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Before(org.junit.Before)

Aggregations

DataflowExecutionStateTracker (org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker)6 Test (org.junit.Test)5 TestDataflowExecutionState (org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState)4 CounterSet (org.apache.beam.runners.dataflow.worker.counters.CounterSet)4 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)3 Closeable (java.io.Closeable)2 ExecutionStateTracker (org.apache.beam.runners.core.metrics.ExecutionStateTracker)2 Counter (org.apache.beam.runners.dataflow.worker.counters.Counter)2 CounterDistribution (org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution)2 CounterName (org.apache.beam.runners.dataflow.worker.counters.CounterName)2 NameContext (org.apache.beam.runners.dataflow.worker.counters.NameContext)2 TestReader (org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils.TestReader)2 Operation (org.apache.beam.runners.dataflow.worker.util.common.worker.Operation)2 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)2 ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)2 ReadOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation)2 TestOutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver)2 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)2 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)2 ExpectedException (org.junit.rules.ExpectedException)2