Search in sources :

Example 1 with Counter

use of org.apache.beam.runners.dataflow.worker.counters.Counter in project beam by apache.

the class MapTaskExecutorTest method testPerElementProcessingTimeCounters.

/**
 * Verify counts for the per-element-output-time counter are correct.
 */
@Test
public void testPerElementProcessingTimeCounters() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(DataflowPipelineDebugOptions.class).setExperiments(Lists.newArrayList(DataflowElementExecutionTracker.TIME_PER_ELEMENT_EXPERIMENT));
    ExecutionStateSampler stateSampler = ExecutionStateSampler.newForTest();
    DataflowExecutionStateTracker stateTracker = new DataflowExecutionStateTracker(stateSampler, new TestDataflowExecutionState(NameContext.forStage("test-stage"), "other", null, /* requestingStepName */
    null, /* sideInputIndex */
    null, /* metricsContainer */
    NoopProfileScope.NOOP), counterSet, options, "test-work-item-id");
    NameContext parDoName = nameForStep("s1");
    // Wire a read operation with 3 elements to a ParDoOperation and assert that we count
    // the correct number of elements.
    ReadOperation read = ReadOperation.forTest(new TestReader("a", "b", "c"), new OutputReceiver(), TestOperationContext.create(counterSet, nameForStep("s0"), null, stateTracker));
    ParDoOperation parDo = new ParDoOperation(new NoopParDoFn(), new OutputReceiver[0], TestOperationContext.create(counterSet, parDoName, null, stateTracker));
    parDo.attachInput(read, 0);
    List<Operation> operations = Lists.newArrayList(read, parDo);
    try (MapTaskExecutor executor = new MapTaskExecutor(operations, counterSet, stateTracker)) {
        executor.execute();
    }
    stateSampler.doSampling(100L);
    CounterName counterName = CounterName.named("per-element-processing-time").withOriginalName(parDoName);
    Counter<Long, CounterDistribution> counter = (Counter<Long, CounterDistribution>) counterSet.getExistingCounter(counterName);
    assertThat(counter.getAggregate().getCount(), equalTo(3L));
}
Also used : CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) TestReader(org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils.TestReader) TestDataflowExecutionState(org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) ExecutionStateSampler(org.apache.beam.runners.core.metrics.ExecutionStateSampler) Test(org.junit.Test)

Example 2 with Counter

use of org.apache.beam.runners.dataflow.worker.counters.Counter in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreateMapTaskExecutor.

@Test
public void testCreateMapTaskExecutor() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"), createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFnWithContext"), createFlattenInstruction(1, 0, 2, 0, "Flatten"), createWriteInstruction(3, 0, "Write"));
    MapTask mapTask = new MapTask();
    mapTask.setStageName(STAGE);
    mapTask.setSystemName("systemName");
    mapTask.setInstructions(instructions);
    mapTask.setFactory(Transport.getJsonFactory());
    try (DataflowMapTaskExecutor executor = mapTaskExecutorFactory.create(null, /* beamFnControlClientHandler */
    null, /* GrpcFnServer<GrpcDataService> */
    null, /* ApiServiceDescriptor */
    null, /* GrpcFnServer<GrpcStateService> */
    mapTaskToNetwork.apply(mapTask), options, STAGE, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage"), counterSet, idGenerator)) {
        // Safe covariant cast not expressible without rawtypes.
        @SuppressWarnings({ // TODO(https://issues.apache.org/jira/browse/BEAM-10556)
        "rawtypes", "unchecked" }) List<Object> operations = (List) executor.operations;
        assertThat(operations, hasItems(instanceOf(ReadOperation.class), instanceOf(ParDoOperation.class), instanceOf(ParDoOperation.class), instanceOf(FlattenOperation.class), instanceOf(WriteOperation.class)));
        // Verify that the inputs are attached.
        ReadOperation readOperation = Iterables.getOnlyElement(Iterables.filter(operations, ReadOperation.class));
        assertEquals(2, readOperation.receivers[0].getReceiverCount());
        FlattenOperation flattenOperation = Iterables.getOnlyElement(Iterables.filter(operations, FlattenOperation.class));
        for (ParDoOperation operation : Iterables.filter(operations, ParDoOperation.class)) {
            assertSame(flattenOperation, operation.receivers[0].getOnlyReceiver());
        }
        WriteOperation writeOperation = Iterables.getOnlyElement(Iterables.filter(operations, WriteOperation.class));
        assertSame(writeOperation, flattenOperation.receivers[0].getOnlyReceiver());
    }
    @SuppressWarnings("unchecked") Counter<Long, ?> otherMsecCounter = (Counter<Long, ?>) counterSet.getExistingCounter("test-other-msecs");
    // "other" state only got created upon MapTaskExecutor.execute().
    assertNull(otherMsecCounter);
    counterSet.extractUpdates(false, updateExtractor);
    verifyOutputCounters(updateExtractor, "read_output_name", "DoFn1_output", "DoFnWithContext_output", "flatten_output_name");
    verify(updateExtractor).longSum(eq(named("Read-ByteCount")), anyBoolean(), anyLong());
    verify(updateExtractor).longSum(eq(named("Write-ByteCount")), anyBoolean(), anyLong());
    verifyNoMoreInteractions(updateExtractor);
}
Also used : ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) FlattenOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) WriteOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation) MapTask(com.google.api.services.dataflow.model.MapTask) Matchers.anyLong(org.mockito.Matchers.anyLong) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 3 with Counter

use of org.apache.beam.runners.dataflow.worker.counters.Counter in project beam by apache.

the class StreamingDataflowWorker method sendWorkerUpdatesToDataflowService.

/**
 * Sends counter updates to Dataflow backend.
 */
private void sendWorkerUpdatesToDataflowService(CounterSet deltaCounters, CounterSet cumulativeCounters) throws IOException {
    // Throttle time is tracked by the windmillServer but is reported to DFE here.
    windmillQuotaThrottling.addValue(windmillServer.getAndResetThrottleTime());
    if (memoryMonitor.isThrashing()) {
        memoryThrashing.addValue(1);
    }
    List<CounterUpdate> counterUpdates = new ArrayList<>(128);
    if (publishCounters) {
        stageInfoMap.values().forEach(s -> counterUpdates.addAll(s.extractCounterUpdates()));
        counterUpdates.addAll(cumulativeCounters.extractUpdates(false, DataflowCounterUpdateExtractor.INSTANCE));
        counterUpdates.addAll(deltaCounters.extractModifiedDeltaUpdates(DataflowCounterUpdateExtractor.INSTANCE));
        if (hasExperiment(options, "beam_fn_api")) {
            Map<Object, List<CounterUpdate>> fnApiCounters = new HashMap<>();
            while (!this.pendingMonitoringInfos.isEmpty()) {
                final CounterUpdate item = this.pendingMonitoringInfos.poll();
                // WorkItem.
                if (item.getCumulative()) {
                    item.setCumulative(false);
                    // Group counterUpdates by counterUpdateKey so they can be aggregated before sending to
                    // dataflow service.
                    fnApiCounters.computeIfAbsent(getCounterUpdateKey(item), k -> new ArrayList<>()).add(item);
                } else {
                    // This is a safety check in case new counter type appears in FnAPI.
                    throw new UnsupportedOperationException("FnApi counters are expected to provide cumulative values." + " Please, update conversion to delta logic" + " if non-cumulative counter type is required.");
                }
            }
            // so we can avoid excessive I/Os for reporting to dataflow service.
            for (List<CounterUpdate> counterUpdateList : fnApiCounters.values()) {
                if (counterUpdateList.isEmpty()) {
                    continue;
                }
                List<CounterUpdate> aggregatedCounterUpdateList = CounterUpdateAggregators.aggregate(counterUpdateList);
                // updates.
                if (aggregatedCounterUpdateList.size() > 10) {
                    CounterUpdate head = aggregatedCounterUpdateList.get(0);
                    this.counterAggregationErrorCount.getAndIncrement();
                    // log warning message only when error count is the power of 2 to avoid spamming.
                    if (this.counterAggregationErrorCount.get() > 10 && Long.bitCount(this.counterAggregationErrorCount.get()) == 1) {
                        LOG.warn("Found non-aggregated counter updates of size {} with kind {}, this will likely " + "cause performance degradation and excessive GC if size is large.", counterUpdateList.size(), MoreObjects.firstNonNull(head.getNameAndKind(), head.getStructuredNameAndMetadata()));
                    }
                }
                counterUpdates.addAll(aggregatedCounterUpdateList);
            }
        }
    }
    // Handle duplicate counters from different stages. Store all the counters in a multi-map and
    // send the counters that appear multiple times in separate RPCs. Same logical counter could
    // appear in multiple stages if a step runs in multiple stages (as with flatten-unzipped stages)
    // especially if the counter definition does not set execution_step_name.
    ListMultimap<Object, CounterUpdate> counterMultimap = MultimapBuilder.hashKeys(counterUpdates.size()).linkedListValues().build();
    boolean hasDuplicates = false;
    for (CounterUpdate c : counterUpdates) {
        Object key = getCounterUpdateKey(c);
        if (counterMultimap.containsKey(key)) {
            hasDuplicates = true;
        }
        counterMultimap.put(key, c);
    }
    // Clears counterUpdates and enqueues unique counters from counterMultimap. If a counter
    // appears more than once, one of them is extracted leaving the remaining in the map.
    Runnable extractUniqueCounters = () -> {
        counterUpdates.clear();
        for (Iterator<Object> iter = counterMultimap.keySet().iterator(); iter.hasNext(); ) {
            List<CounterUpdate> counters = counterMultimap.get(iter.next());
            counterUpdates.add(counters.get(0));
            if (counters.size() == 1) {
                // There is single value. Remove the entry through the iterator.
                iter.remove();
            } else {
                // Otherwise remove the first value.
                counters.remove(0);
            }
        }
    };
    if (hasDuplicates) {
        extractUniqueCounters.run();
    } else {
        // Common case: no duplicates. We can just send counterUpdates, empty the multimap.
        counterMultimap.clear();
    }
    List<Status> errors;
    synchronized (pendingFailuresToReport) {
        errors = new ArrayList<>(pendingFailuresToReport.size());
        for (String stackTrace : pendingFailuresToReport) {
            errors.add(new Status().setCode(// rpc.Code.UNKNOWN
            2).setMessage(stackTrace));
        }
        // Best effort only, no need to wait till successfully sent.
        pendingFailuresToReport.clear();
    }
    WorkItemStatus workItemStatus = new WorkItemStatus().setWorkItemId(WINDMILL_COUNTER_UPDATE_WORK_ID).setErrors(errors).setCounterUpdates(counterUpdates);
    workUnitClient.reportWorkItemStatus(workItemStatus);
    // Send any counters appearing more than once in subsequent RPCs:
    while (!counterMultimap.isEmpty()) {
        extractUniqueCounters.run();
        workUnitClient.reportWorkItemStatus(new WorkItemStatus().setWorkItemId(WINDMILL_COUNTER_UPDATE_WORK_ID).setCounterUpdates(counterUpdates));
    }
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) MapTask(com.google.api.services.dataflow.model.MapTask) UserCodeException(org.apache.beam.sdk.util.UserCodeException) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) MetricsLogger(org.apache.beam.runners.core.metrics.MetricsLogger) CommitWorkStream(org.apache.beam.runners.dataflow.worker.windmill.WindmillServerStub.CommitWorkStream) CloudObjects(org.apache.beam.runners.dataflow.util.CloudObjects) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) CreateRegisterFnOperationFunction(org.apache.beam.runners.dataflow.worker.graph.CreateRegisterFnOperationFunction) ScopedProfiler(org.apache.beam.runners.dataflow.worker.profiler.ScopedProfiler) StreamPool(org.apache.beam.runners.dataflow.worker.windmill.WindmillServerStub.StreamPool) DataflowCounterUpdateExtractor(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor) Uninterruptibles(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.Uninterruptibles) TimerTask(java.util.TimerTask) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) WorkerStatusPages(org.apache.beam.runners.dataflow.worker.status.WorkerStatusPages) RegisterNodeFunction(org.apache.beam.runners.dataflow.worker.graph.RegisterNodeFunction) IdGenerator(org.apache.beam.sdk.fn.IdGenerator) PrintWriter(java.io.PrintWriter) KvCoder(org.apache.beam.sdk.coders.KvCoder) THROTTLING_MSECS_METRIC_NAME(org.apache.beam.runners.dataflow.worker.DataflowSystemMetrics.THROTTLING_MSECS_METRIC_NAME) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) CacheBuilder(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.CacheBuilder) Sleeper(org.apache.beam.sdk.util.Sleeper) StreamingModeExecutionStateRegistry(org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StreamingModeExecutionStateRegistry) DebugCapture(org.apache.beam.runners.dataflow.worker.status.DebugCapture) Executors(java.util.concurrent.Executors) BoundedQueueExecutor(org.apache.beam.runners.dataflow.worker.util.BoundedQueueExecutor) MultimapBuilder(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.MultimapBuilder) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AutoValue(com.google.auto.value.AutoValue) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) InsertFetchAndFilterStreamingSideInputNodes(org.apache.beam.runners.dataflow.worker.graph.InsertFetchAndFilterStreamingSideInputNodes) Capturable(org.apache.beam.runners.dataflow.worker.status.DebugCapture.Capturable) Networks(org.apache.beam.runners.dataflow.worker.graph.Networks) DeduceNodeLocationsFunction(org.apache.beam.runners.dataflow.worker.graph.DeduceNodeLocationsFunction) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) Cache(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.Cache) Duration(org.joda.time.Duration) Splitter(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Splitter) Optional(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Optional) ArrayList(java.util.ArrayList) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) Status(com.google.api.services.dataflow.model.Status) HttpServletRequest(javax.servlet.http.HttpServletRequest) EvictingQueue(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.EvictingQueue) GetWorkStream(org.apache.beam.runners.dataflow.worker.windmill.WindmillServerStub.GetWorkStream) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) StreamingComputationConfig(com.google.api.services.dataflow.model.StreamingComputationConfig) DataflowRunner(org.apache.beam.runners.dataflow.DataflowRunner) IOException(java.io.IOException) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) CounterStructuredName(com.google.api.services.dataflow.model.CounterStructuredName) AtomicLong(java.util.concurrent.atomic.AtomicLong) MetricsEnvironment(org.apache.beam.sdk.metrics.MetricsEnvironment) DataflowWorkerLoggingMDC(org.apache.beam.runners.dataflow.worker.logging.DataflowWorkerLoggingMDC) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) ArrayDeque(java.util.ArrayDeque) FileSystems(org.apache.beam.sdk.io.FileSystems) StreamingPerStageSystemCounterNames(org.apache.beam.runners.dataflow.worker.DataflowSystemMetrics.StreamingPerStageSystemCounterNames) State(org.apache.beam.runners.dataflow.worker.StreamingDataflowWorker.Work.State) CounterUpdateAggregators(org.apache.beam.runners.dataflow.worker.counters.CounterUpdateAggregators) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) ReplacePgbkWithPrecombineFunction(org.apache.beam.runners.dataflow.worker.graph.ReplacePgbkWithPrecombineFunction) OutputObjectAndByteCounter(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputObjectAndByteCounter) MoreObjects(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) Timer(java.util.Timer) MutableNetwork(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.graph.MutableNetwork) HostAndPort(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.net.HostAndPort) BackOff(org.apache.beam.sdk.util.BackOff) BackOffUtils(org.apache.beam.sdk.util.BackOffUtils) StatusDataProvider(org.apache.beam.runners.dataflow.worker.status.StatusDataProvider) DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions) DataflowRunner.hasExperiment(org.apache.beam.runners.dataflow.DataflowRunner.hasExperiment) Transport(org.apache.beam.sdk.extensions.gcp.util.Transport) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) ThreadFactory(java.util.concurrent.ThreadFactory) JvmInitializers(org.apache.beam.sdk.fn.JvmInitializers) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SdkWorkerHarness(org.apache.beam.runners.dataflow.worker.SdkHarnessRegistry.SdkWorkerHarness) FixMultiOutputInfosOnParDoInstructions(org.apache.beam.runners.dataflow.worker.apiary.FixMultiOutputInfosOnParDoInstructions) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) LastExceptionDataProvider(org.apache.beam.runners.dataflow.worker.status.LastExceptionDataProvider) List(java.util.List) ListMultimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ListMultimap) Queue(java.util.Queue) BaseStatusServlet(org.apache.beam.runners.dataflow.worker.status.BaseStatusServlet) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) StreamingSystemCounterNames(org.apache.beam.runners.dataflow.worker.DataflowSystemMetrics.StreamingSystemCounterNames) IdGenerators(org.apache.beam.sdk.fn.IdGenerators) CustomSources(org.apache.beam.runners.dataflow.internal.CustomSources) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionStateSampler(org.apache.beam.runners.core.metrics.ExecutionStateSampler) Deque(java.util.Deque) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) WorkItem(com.google.api.services.dataflow.model.WorkItem) Function(java.util.function.Function) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) TextFormat(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.TextFormat) ConcurrentMap(java.util.concurrent.ConcurrentMap) MemoryMonitor(org.apache.beam.runners.dataflow.worker.util.MemoryMonitor) HashSet(java.util.HashSet) DeduceFlattenLocationsFunction(org.apache.beam.runners.dataflow.worker.graph.DeduceFlattenLocationsFunction) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) WindmillServerStub(org.apache.beam.runners.dataflow.worker.windmill.WindmillServerStub) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Nullable(org.checkerframework.checker.nullness.qual.Nullable) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) MapTaskToNetworkFunction(org.apache.beam.runners.dataflow.worker.graph.MapTaskToNetworkFunction) FluentBackoff(org.apache.beam.sdk.util.FluentBackoff) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) CloneAmbiguousFlattensFunction(org.apache.beam.runners.dataflow.worker.graph.CloneAmbiguousFlattensFunction) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Semaphore(java.util.concurrent.Semaphore) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) HttpServletResponse(javax.servlet.http.HttpServletResponse) TimeUnit(java.util.concurrent.TimeUnit) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) Instant(org.joda.time.Instant) RemoteGrpcPort(org.apache.beam.model.fnexecution.v1.BeamFnApi.RemoteGrpcPort) Collections(java.util.Collections) LengthPrefixUnknownCoders(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) Status(com.google.api.services.dataflow.model.Status) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) Iterator(java.util.Iterator) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) ArrayList(java.util.ArrayList) List(java.util.List)

Example 4 with Counter

use of org.apache.beam.runners.dataflow.worker.counters.Counter in project beam by apache.

the class GroupingShuffleReaderTest method expectShuffleReadCounterEquals.

private void expectShuffleReadCounterEquals(TestShuffleReadCounterFactory factory, Map<String, Long> expectedReadBytesForOriginal) {
    ShuffleReadCounter src = factory.getOnlyShuffleReadCounterOrNull();
    assertNotNull(src);
    // should not be set.
    if (src.legacyPerOperationPerDatasetBytesCounter != null) {
        assertEquals(0, (long) src.legacyPerOperationPerDatasetBytesCounter.getAggregate());
    }
    // Verify that each executing step used when reading from the GroupingShuffleReader
    // has a counter with a bytes read value.
    assertEquals(expectedReadBytesForOriginal.size(), (long) src.counterSet.size());
    Iterator it = expectedReadBytesForOriginal.entrySet().iterator();
    while (it.hasNext()) {
        Map.Entry<String, Long> pair = (Map.Entry) it.next();
        Counter counter = src.counterSet.getExistingCounter(ShuffleReadCounter.generateCounterName(ORIGINAL_SHUFFLE_STEP_NAME, pair.getKey()));
        assertEquals(pair.getValue(), counter.getAggregate());
    }
}
Also used : ShuffleEntry(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry) ShuffleReadCounter(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleReadCounter) ShuffleReadCounter(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleReadCounter) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) GroupingShuffleReaderIterator(org.apache.beam.runners.dataflow.worker.GroupingShuffleReader.GroupingShuffleReaderIterator) Iterator(java.util.Iterator) Base64.encodeBase64URLSafeString(com.google.api.client.util.Base64.encodeBase64URLSafeString) Map(java.util.Map) HashMap(java.util.HashMap)

Example 5 with Counter

use of org.apache.beam.runners.dataflow.worker.counters.Counter in project beam by apache.

the class IntrinsicMapTaskExecutorTest method testPerElementProcessingTimeCounters.

/**
 * Verify counts for the per-element-output-time counter are correct.
 */
@Test
public void testPerElementProcessingTimeCounters() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(DataflowPipelineDebugOptions.class).setExperiments(Lists.newArrayList(DataflowElementExecutionTracker.TIME_PER_ELEMENT_EXPERIMENT));
    DataflowExecutionStateTracker stateTracker = new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), new TestDataflowExecutionState(NameContext.forStage("test-stage"), "other", null, /* requestingStepName */
    null, /* sideInputIndex */
    null, /* metricsContainer */
    NoopProfileScope.NOOP), counterSet, options, "test-work-item-id");
    NameContext parDoName = nameForStep("s1");
    // Wire a read operation with 3 elements to a ParDoOperation and assert that we count
    // the correct number of elements.
    ReadOperation read = ReadOperation.forTest(new TestReader("a", "b", "c"), new OutputReceiver(), TestOperationContext.create(counterSet, nameForStep("s0"), null, stateTracker));
    ParDoOperation parDo = new ParDoOperation(new NoopParDoFn(), new OutputReceiver[0], TestOperationContext.create(counterSet, parDoName, null, stateTracker));
    parDo.attachInput(read, 0);
    List<Operation> operations = Lists.newArrayList(read, parDo);
    try (IntrinsicMapTaskExecutor executor = IntrinsicMapTaskExecutor.withSharedCounterSet(operations, counterSet, stateTracker)) {
        executor.execute();
    }
    CounterName counterName = CounterName.named("per-element-processing-time").withOriginalName(parDoName);
    Counter<Long, CounterDistribution> counter = (Counter<Long, CounterDistribution>) counterSet.getExistingCounter(counterName);
    assertThat(counter.getAggregate().getCount(), equalTo(3L));
}
Also used : CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) TestReader(org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils.TestReader) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) TestDataflowExecutionState(org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) Test(org.junit.Test)

Aggregations

Counter (org.apache.beam.runners.dataflow.worker.counters.Counter)7 Test (org.junit.Test)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Iterator (java.util.Iterator)3 Map (java.util.Map)3 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)3 CounterDistribution (org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution)3 CounterName (org.apache.beam.runners.dataflow.worker.counters.CounterName)3 NameContext (org.apache.beam.runners.dataflow.worker.counters.NameContext)3 ReadOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation)3 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)3 MapTask (com.google.api.services.dataflow.model.MapTask)2 List (java.util.List)2 Base64.encodeBase64URLSafeString (com.google.api.client.util.Base64.encodeBase64URLSafeString)1 CounterStructuredName (com.google.api.services.dataflow.model.CounterStructuredName)1 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)1 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)1 Status (com.google.api.services.dataflow.model.Status)1 StreamingComputationConfig (com.google.api.services.dataflow.model.StreamingComputationConfig)1