Search in sources :

Example 6 with CounterUpdate

use of com.google.api.services.dataflow.model.CounterUpdate in project beam by apache.

the class CounterShortIdCache method storeNewShortIds.

/**
 * Add any new short ids received to the table. The outgoing request will have the full counter
 * updates, and the incoming responses have the associated short ids. By matching up short ids
 * with the counters in order we can build a mapping of name -> short_id for future use.
 */
public void storeNewShortIds(final ReportWorkItemStatusRequest request, final ReportWorkItemStatusResponse reply) {
    checkArgument(request.getWorkItemStatuses() != null && reply.getWorkItemServiceStates() != null && request.getWorkItemStatuses().size() == reply.getWorkItemServiceStates().size(), "RequestWorkItemStatus request and response are unbalanced, status: %s, states: %s", request.getWorkItemStatuses(), reply.getWorkItemServiceStates());
    for (int i = 0; i < request.getWorkItemStatuses().size(); i++) {
        WorkItemServiceState state = reply.getWorkItemServiceStates().get(i);
        WorkItemStatus status = request.getWorkItemStatuses().get(i);
        if (state.getMetricShortId() == null) {
            continue;
        }
        checkArgument(status.getCounterUpdates() != null, "Response has shortids but no corresponding CounterUpdate");
        for (MetricShortId shortIdMsg : state.getMetricShortId()) {
            int metricIndex = MoreObjects.firstNonNull(shortIdMsg.getMetricIndex(), 0);
            checkArgument(metricIndex < status.getCounterUpdates().size(), "Received aggregate index outside range of sent update %s >= %s", shortIdMsg.getMetricIndex(), status.getCounterUpdates().size());
            CounterUpdate update = status.getCounterUpdates().get(metricIndex);
            cache.insert(update, checkNotNull(shortIdMsg.getShortId(), "Shortid should be non-null"));
        }
    }
}
Also used : WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) WorkItemServiceState(com.google.api.services.dataflow.model.WorkItemServiceState) MetricShortId(com.google.api.services.dataflow.model.MetricShortId) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate)

Example 7 with CounterUpdate

use of com.google.api.services.dataflow.model.CounterUpdate in project beam by apache.

the class StreamingModeExecutionContextTest method stateSamplingInStreaming.

@Test(timeout = 2000)
public void stateSamplingInStreaming() {
    // Test that when writing on one thread and reading from another, updates always eventually
    // reach the reading thread.
    StreamingModeExecutionState state = new StreamingModeExecutionState(NameContextsForTests.nameContextForTest(), "testState", null, NoopProfileScope.NOOP, null);
    ExecutionStateSampler sampler = ExecutionStateSampler.newForTest();
    try {
        sampler.start();
        ExecutionStateTracker tracker = new ExecutionStateTracker(sampler);
        Thread executionThread = new Thread();
        executionThread.setName("looping-thread-for-test");
        tracker.activate(executionThread);
        tracker.enterState(state);
        // Wait for the state to be incremented 3 times
        for (int i = 0; i < 3; i++) {
            CounterUpdate update = null;
            while (update == null) {
                update = state.extractUpdate(false);
            }
            long newValue = splitIntToLong(update.getInteger());
            assertThat(newValue, Matchers.greaterThan(0L));
        }
    } finally {
        sampler.stop();
    }
}
Also used : StreamingModeExecutionState(org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StreamingModeExecutionState) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) ExecutionStateSampler(org.apache.beam.runners.core.metrics.ExecutionStateSampler) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) Test(org.junit.Test)

Example 8 with CounterUpdate

use of com.google.api.services.dataflow.model.CounterUpdate in project beam by apache.

the class StreamingModeExecutionContextTest method testAtomicExtractUpdate.

/**
 * Ensure that incrementing and extracting counter updates are correct under concurrent reader and
 * writer threads.
 */
@Test
public void testAtomicExtractUpdate() throws InterruptedException, ExecutionException {
    long numUpdates = 1_000_000;
    StreamingModeExecutionState state = new StreamingModeExecutionState(NameContextsForTests.nameContextForTest(), "testState", null, NoopProfileScope.NOOP, null);
    ExecutorService executor = Executors.newFixedThreadPool(2);
    AtomicBoolean doneWriting = new AtomicBoolean(false);
    Callable<Long> reader = () -> {
        long count = 0;
        boolean isLastRead;
        do {
            isLastRead = doneWriting.get();
            CounterUpdate update = state.extractUpdate(false);
            if (update != null) {
                count += splitIntToLong(update.getInteger());
            }
        } while (!isLastRead);
        return count;
    };
    Runnable writer = () -> {
        for (int i = 0; i < numUpdates; i++) {
            state.takeSample(1L);
        }
        doneWriting.set(true);
    };
    // NB: Reader is invoked before writer to ensure they execute concurrently.
    List<Future<Long>> results = executor.invokeAll(Lists.newArrayList(reader, Executors.callable(writer, 0L)), 2, TimeUnit.SECONDS);
    long count = results.get(0).get();
    assertThat(count, equalTo(numUpdates));
}
Also used : StreamingModeExecutionState(org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StreamingModeExecutionState) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ExecutorService(java.util.concurrent.ExecutorService) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) Future(java.util.concurrent.Future) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) Test(org.junit.Test)

Example 9 with CounterUpdate

use of com.google.api.services.dataflow.model.CounterUpdate in project beam by apache.

the class StreamingStepMetricsContainerTest method testDistributionUpdateExtraction.

@Test
public void testDistributionUpdateExtraction() {
    Distribution distribution = c1.getDistribution(name1);
    distribution.update(5);
    distribution.update(6);
    distribution.update(7);
    Iterable<CounterUpdate> updates = StreamingStepMetricsContainer.extractMetricUpdates(registry);
    assertThat(updates, containsInAnyOrder(new CounterUpdate().setStructuredNameAndMetadata(new CounterStructuredNameAndMetadata().setName(new CounterStructuredName().setOrigin(Origin.USER.toString()).setOriginNamespace("ns").setName("name1").setOriginalStepName("s1")).setMetadata(new CounterMetadata().setKind(Kind.DISTRIBUTION.toString()))).setCumulative(false).setDistribution(new DistributionUpdate().setCount(longToSplitInt(3)).setMax(longToSplitInt(7)).setMin(longToSplitInt(5)).setSum(longToSplitInt(18)))));
    c1.getDistribution(name1).update(3);
    updates = StreamingStepMetricsContainer.extractMetricUpdates(registry);
    assertThat(updates, containsInAnyOrder(new CounterUpdate().setStructuredNameAndMetadata(new CounterStructuredNameAndMetadata().setName(new CounterStructuredName().setOrigin(Origin.USER.toString()).setOriginNamespace("ns").setName("name1").setOriginalStepName("s1")).setMetadata(new CounterMetadata().setKind(Kind.DISTRIBUTION.toString()))).setCumulative(false).setDistribution(new DistributionUpdate().setCount(longToSplitInt(1)).setMax(longToSplitInt(3)).setMin(longToSplitInt(3)).setSum(longToSplitInt(3)))));
}
Also used : CounterMetadata(com.google.api.services.dataflow.model.CounterMetadata) CounterStructuredName(com.google.api.services.dataflow.model.CounterStructuredName) Distribution(org.apache.beam.sdk.metrics.Distribution) DistributionUpdate(com.google.api.services.dataflow.model.DistributionUpdate) CounterStructuredNameAndMetadata(com.google.api.services.dataflow.model.CounterStructuredNameAndMetadata) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) Test(org.junit.Test)

Example 10 with CounterUpdate

use of com.google.api.services.dataflow.model.CounterUpdate in project beam by apache.

the class StreamingDataflowWorkerTest method testUnboundedSources.

@Test
public void testUnboundedSources() throws Exception {
    List<Integer> finalizeTracker = Lists.newArrayList();
    TestCountingSource.setFinalizeTracker(finalizeTracker);
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorker worker = makeWorker(makeUnboundedSourcePipeline(), createTestingPipelineOptions(server), false);
    worker.start();
    // Test new key.
    server.addWorkToOffer(buildInput("work {" + "  computation_id: \"computation\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"0000000000000001\"" + "    sharding_key: 1" + "    work_token: 1" + "    cache_token: 1" + "  }" + "}", null));
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
    Iterable<CounterUpdate> counters = worker.buildCounters();
    Windmill.WorkItemCommitRequest commit = result.get(1L);
    UnsignedLong finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
    assertThat(commit, equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, CoderUtils.encodeToByteArray(CollectionCoder.of(GlobalWindow.Coder.INSTANCE), Arrays.asList(GlobalWindow.INSTANCE)), parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 1 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "output_messages {" + "  destination_stream_id: \"out\"" + "  bundles {" + "    key: \"0000000000000001\"" + "    messages {" + "      timestamp: 0" + "      data: \"0:0\"" + "    }" + "    messages_ids: \"\"" + "  }" + "} " + "source_state_updates {" + "  state: \"\000\"" + "  finalize_ids: " + finalizeId + "} " + "source_watermark: 1000")).build()));
    assertEquals(18L, splitIntToLong(getCounter(counters, "dataflow_input_size-computation").getInteger()));
    // Test same key continuing. The counter is done.
    server.addWorkToOffer(buildInput("work {" + "  computation_id: \"computation\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"0000000000000001\"" + "    sharding_key: 1" + "    work_token: 2" + "    cache_token: 1" + "    source_state {" + "      state: \"\001\"" + "      finalize_ids: " + finalizeId + "    } " + "  }" + "}", null));
    result = server.waitForAndGetCommits(1);
    counters = worker.buildCounters();
    commit = result.get(2L);
    finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
    assertThat(commit, equalTo(parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 2 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "source_state_updates {" + "  state: \"\000\"" + "  finalize_ids: " + finalizeId + "} " + "source_watermark: 1000").build()));
    assertThat(finalizeTracker, contains(0));
    assertEquals(null, getCounter(counters, "dataflow_input_size-computation"));
    // Test recovery (on a new key so fresh reader state). Counter is done.
    server.addWorkToOffer(buildInput("work {" + "  computation_id: \"computation\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"0000000000000002\"" + "    sharding_key: 2" + "    work_token: 3" + "    cache_token: 2" + "    source_state {" + "      state: \"\000\"" + "    } " + "  }" + "}", null));
    result = server.waitForAndGetCommits(1);
    counters = worker.buildCounters();
    commit = result.get(3L);
    finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
    assertThat(commit, equalTo(parseCommitRequest("key: \"0000000000000002\" " + "sharding_key: 2 " + "work_token: 3 " + "cache_token: 2 " + "source_backlog_bytes: 7 " + "source_state_updates {" + "  state: \"\000\"" + "  finalize_ids: " + finalizeId + "} " + "source_watermark: 1000").build()));
    assertEquals(null, getCounter(counters, "dataflow_input_size-computation"));
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) Test(org.junit.Test)

Aggregations

CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)51 Test (org.junit.Test)33 CounterStructuredNameAndMetadata (com.google.api.services.dataflow.model.CounterStructuredNameAndMetadata)18 CounterMetadata (com.google.api.services.dataflow.model.CounterMetadata)16 CounterStructuredName (com.google.api.services.dataflow.model.CounterStructuredName)12 HashMap (java.util.HashMap)10 WorkItemStatus (com.google.api.services.dataflow.model.WorkItemStatus)9 ArrayList (java.util.ArrayList)9 MonitoringInfo (org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo)7 DataflowStepContext (org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowStepContext)7 DistributionUpdate (com.google.api.services.dataflow.model.DistributionUpdate)6 NameContext (org.apache.beam.runners.dataflow.worker.counters.NameContext)6 Nullable (org.checkerframework.checker.nullness.qual.Nullable)6 NameAndKind (com.google.api.services.dataflow.model.NameAndKind)5 AtomicLong (java.util.concurrent.atomic.AtomicLong)5 DataflowCounterUpdateExtractor.splitIntToLong (org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong)5 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)4 IntegerMean (com.google.api.services.dataflow.model.IntegerMean)3 List (java.util.List)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3