Search in sources :

Example 6 with CounterName

use of org.apache.beam.runners.dataflow.worker.counters.CounterName in project beam by apache.

the class SimpleParDoFnTest method testOutputsPerElementCounterDisabledViaExperiment.

// TODO: Remove once Distributions has shipped.
@Test
public void testOutputsPerElementCounterDisabledViaExperiment() throws Exception {
    DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
    List<String> experiments = debugOptions.getExperiments();
    experiments.remove(SimpleParDoFn.OUTPUTS_PER_ELEMENT_EXPERIMENT);
    debugOptions.setExperiments(experiments);
    List<CounterUpdate> counterUpdates = executeParDoFnCounterTest(0);
    CounterName expectedName = CounterName.named("per-element-output-count").withOriginalName(stepContext.getNameContext());
    assertThat(counterUpdates, not(contains(hasStructuredName(expectedName, "DISTRIBUTION"))));
}
Also used : CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) Test(org.junit.Test)

Example 7 with CounterName

use of org.apache.beam.runners.dataflow.worker.counters.CounterName in project beam by apache.

the class SimpleParDoFnTest method testOutputsPerElementCounter.

@Test
public void testOutputsPerElementCounter() throws Exception {
    int[] inputData = new int[] { 1, 2, 3, 4, 5 };
    CounterDistribution expectedDistribution = CounterDistribution.builder().minMax(1, 5).count(5).sum(1 + 2 + 3 + 4 + 5).sumOfSquares(1 + 4 + 9 + 16 + 25).buckets(1, Lists.newArrayList(1L, 3L, 1L)).build();
    List<CounterUpdate> counterUpdates = executeParDoFnCounterTest(inputData);
    CounterName expectedName = CounterName.named("per-element-output-count").withOriginalName(stepContext.getNameContext());
    assertThat(counterUpdates, contains(allOf(hasStructuredName(expectedName, "DISTRIBUTION"), hasDistribution(expectedDistribution))));
}
Also used : CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) Test(org.junit.Test)

Example 8 with CounterName

use of org.apache.beam.runners.dataflow.worker.counters.CounterName in project beam by apache.

the class IsmSideInputReaderTest method testIterableSideInputReadCounter.

@Test
public void testIterableSideInputReadCounter() throws Exception {
    // These are the expected msec and byte counters:
    CounterUpdate expectedSideInputMsecUpdate = new CounterUpdate().setStructuredNameAndMetadata(new CounterStructuredNameAndMetadata().setMetadata(new CounterMetadata().setKind(Kind.SUM.toString())).setName(new CounterStructuredName().setOrigin("SYSTEM").setName("read-sideinput-msecs").setOriginalStepName("originalName").setExecutionStepName("stageName").setOriginalRequestingStepName("originalName2").setInputIndex(1))).setCumulative(true).setInteger(new SplitInt64().setHighBits(0).setLowBits(0L));
    CounterName expectedCounterName = CounterName.named("read-sideinput-byte-count").withOriginalName(operationContext.nameContext()).withOrigin("SYSTEM").withOriginalRequestingStepName("originalName2").withInputIndex(1);
    // Test startup:
    Coder<WindowedValue<Long>> valueCoder = WindowedValue.getFullCoder(VarLongCoder.of(), GLOBAL_WINDOW_CODER);
    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 0, ImmutableList.of(GLOBAL_WINDOW_CODER, BigEndianLongCoder.of()), valueCoder);
    // Create a new state, which represents a step that receives the side input.
    DataflowExecutionState state2 = executionContext.getExecutionStateRegistry().getState(NameContext.create("stageName", "originalName2", "systemName2", "userName2"), "process", null, NoopProfileScope.NOOP);
    final List<KV<Long, WindowedValue<Long>>> firstElements = Arrays.asList(KV.of(0L, valueInGlobalWindow(0L)));
    final List<KV<Long, WindowedValue<Long>>> secondElements = new ArrayList<>();
    for (long i = 0; i < 100; i++) {
        secondElements.add(KV.of(i, valueInGlobalWindow(i * 10)));
    }
    final PCollectionView<Iterable<Long>> view = Pipeline.create().apply(Create.empty(VarLongCoder.of())).apply(View.asIterable());
    Source sourceA = initInputFile(fromKvsForList(firstElements), ismCoder);
    Source sourceB = initInputFile(fromKvsForList(secondElements), ismCoder);
    try (Closeable state2Closeable = executionContext.getExecutionStateTracker().enterState(state2)) {
        final IsmSideInputReader reader = serialSideInputReader(view.getTagInternal().getId(), sourceA, sourceB);
        // Store a strong reference to the returned value so that the logical reference
        // cache is not cleared for this test.
        Iterable<Long> value = reader.get(view, GlobalWindow.INSTANCE);
        verifyIterable(toValueList(concat(firstElements, secondElements)), value);
        // Assert that the same value reference was returned showing that it was cached.
        assertSame(reader.get(view, GlobalWindow.INSTANCE), value);
        Iterable<CounterUpdate> counterUpdates = executionContext.getExecutionStateRegistry().extractUpdates(true);
        assertThat(counterUpdates, hasItem(expectedSideInputMsecUpdate));
        Counter<?, ?> expectedCounter = counterFactory.getExistingCounter(expectedCounterName);
        assertNotNull(expectedCounter);
    }
}
Also used : CounterMetadata(com.google.api.services.dataflow.model.CounterMetadata) CounterStructuredName(com.google.api.services.dataflow.model.CounterStructuredName) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) SplitInt64(com.google.api.services.dataflow.model.SplitInt64) KV(org.apache.beam.sdk.values.KV) Source(com.google.api.services.dataflow.model.Source) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) DataflowExecutionState(org.apache.beam.runners.dataflow.worker.DataflowOperationContext.DataflowExecutionState) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CounterStructuredNameAndMetadata(com.google.api.services.dataflow.model.CounterStructuredNameAndMetadata) Test(org.junit.Test)

Example 9 with CounterName

use of org.apache.beam.runners.dataflow.worker.counters.CounterName in project beam by apache.

the class IntrinsicMapTaskExecutorTest method testPerElementProcessingTimeCounters.

/**
 * Verify counts for the per-element-output-time counter are correct.
 */
@Test
public void testPerElementProcessingTimeCounters() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(DataflowPipelineDebugOptions.class).setExperiments(Lists.newArrayList(DataflowElementExecutionTracker.TIME_PER_ELEMENT_EXPERIMENT));
    DataflowExecutionStateTracker stateTracker = new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), new TestDataflowExecutionState(NameContext.forStage("test-stage"), "other", null, /* requestingStepName */
    null, /* sideInputIndex */
    null, /* metricsContainer */
    NoopProfileScope.NOOP), counterSet, options, "test-work-item-id");
    NameContext parDoName = nameForStep("s1");
    // Wire a read operation with 3 elements to a ParDoOperation and assert that we count
    // the correct number of elements.
    ReadOperation read = ReadOperation.forTest(new TestReader("a", "b", "c"), new OutputReceiver(), TestOperationContext.create(counterSet, nameForStep("s0"), null, stateTracker));
    ParDoOperation parDo = new ParDoOperation(new NoopParDoFn(), new OutputReceiver[0], TestOperationContext.create(counterSet, parDoName, null, stateTracker));
    parDo.attachInput(read, 0);
    List<Operation> operations = Lists.newArrayList(read, parDo);
    try (IntrinsicMapTaskExecutor executor = IntrinsicMapTaskExecutor.withSharedCounterSet(operations, counterSet, stateTracker)) {
        executor.execute();
    }
    CounterName counterName = CounterName.named("per-element-processing-time").withOriginalName(parDoName);
    Counter<Long, CounterDistribution> counter = (Counter<Long, CounterDistribution>) counterSet.getExistingCounter(counterName);
    assertThat(counter.getAggregate().getCount(), equalTo(3L));
}
Also used : CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) TestReader(org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils.TestReader) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) TestDataflowExecutionState(org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) Test(org.junit.Test)

Example 10 with CounterName

use of org.apache.beam.runners.dataflow.worker.counters.CounterName in project beam by apache.

the class DataflowExecutionStateTrackerTest method assertElementProcessingTimeCounter.

private void assertElementProcessingTimeCounter(NameContext step, int millis, int bucketOffset) {
    CounterName counterName = ElementExecutionTracker.COUNTER_NAME.withOriginalName(step);
    Counter<?, CounterDistribution> counter = (Counter<?, CounterFactory.CounterDistribution>) counterSet.getExistingCounter(counterName);
    assertNotNull(counter);
    CounterFactory.CounterDistribution distribution = counter.getAggregate();
    assertThat(distribution, equalTo(CounterFactory.CounterDistribution.builder().minMax(millis, millis).count(1).sum(millis).sumOfSquares(millis * millis).buckets(bucketOffset, Lists.newArrayList(1L)).build()));
}
Also used : CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) CounterFactory(org.apache.beam.runners.dataflow.worker.counters.CounterFactory)

Aggregations

CounterName (org.apache.beam.runners.dataflow.worker.counters.CounterName)12 Test (org.junit.Test)9 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)4 NameContextsForTests.nameContextForTest (org.apache.beam.runners.dataflow.worker.NameContextsForTests.nameContextForTest)4 CounterDistribution (org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution)4 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)3 Counter (org.apache.beam.runners.dataflow.worker.counters.Counter)3 DataflowExecutionStateTracker (org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker)2 DataflowExecutionState (org.apache.beam.runners.dataflow.worker.DataflowOperationContext.DataflowExecutionState)2 TestDataflowExecutionState (org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState)2 NameContext (org.apache.beam.runners.dataflow.worker.counters.NameContext)2 TestReader (org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils.TestReader)2 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)2 CounterMetadata (com.google.api.services.dataflow.model.CounterMetadata)1 CounterStructuredName (com.google.api.services.dataflow.model.CounterStructuredName)1 CounterStructuredNameAndMetadata (com.google.api.services.dataflow.model.CounterStructuredNameAndMetadata)1 Source (com.google.api.services.dataflow.model.Source)1 SplitInt64 (com.google.api.services.dataflow.model.SplitInt64)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 Closeable (java.io.Closeable)1