use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.
the class WorkerCustomSourcesTest method testReadUnboundedReader.
@Test
public void testReadUnboundedReader() throws Exception {
CounterSet counterSet = new CounterSet();
StreamingModeExecutionStateRegistry executionStateRegistry = new StreamingModeExecutionStateRegistry(null);
ReaderCache readerCache = new ReaderCache(Duration.standardMinutes(1), Runnable::run);
StreamingModeExecutionContext context = new StreamingModeExecutionContext(counterSet, "computationId", readerCache, /*stateNameMap=*/
ImmutableMap.of(), /*stateCache=*/
null, StreamingStepMetricsContainer.createRegistry(), new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), executionStateRegistry.getState(NameContext.forStage("stageName"), "other", null, NoopProfileScope.NOOP), counterSet, PipelineOptionsFactory.create(), "test-work-item-id"), executionStateRegistry, Long.MAX_VALUE);
options.setNumWorkers(5);
int maxElements = 10;
DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
debugOptions.setUnboundedReaderMaxElements(maxElements);
ByteString state = ByteString.EMPTY;
for (int i = 0; i < 10 * maxElements; ) /* Incremented in inner loop */
{
// Initialize streaming context with state from previous iteration.
context.start("key", Windmill.WorkItem.newBuilder().setKey(// key is zero-padded index.
ByteString.copyFromUtf8("0000000000000001")).setWorkToken(// Must be increasing across activations for cache to be used.
i).setCacheToken(1).setSourceState(// Source state.
Windmill.SourceState.newBuilder().setState(state).build()).build(), // input watermark
new Instant(0), // output watermark
null, // synchronized processing time
null, // StateReader
null, // StateFetcher
null, Windmill.WorkItemCommitRequest.newBuilder());
@SuppressWarnings({ "unchecked", "rawtypes" }) NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader = (NativeReader) WorkerCustomSources.create((CloudObject) serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options).getSpec(), options, context);
// Verify data.
Instant beforeReading = Instant.now();
int numReadOnThisIteration = 0;
for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value : ReaderUtils.readAllFromReader(reader)) {
assertEquals(KV.of(0, i), value.getValue().getValue());
assertArrayEquals(encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)), value.getValue().getId());
assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
assertEquals(i, value.getTimestamp().getMillis());
i++;
numReadOnThisIteration++;
}
Instant afterReading = Instant.now();
long maxReadSec = debugOptions.getUnboundedReaderMaxReadTimeSec();
assertThat(new Duration(beforeReading, afterReading).getStandardSeconds(), lessThanOrEqualTo(maxReadSec + 1));
assertThat(numReadOnThisIteration, lessThanOrEqualTo(debugOptions.getUnboundedReaderMaxElements()));
// Extract and verify state modifications.
context.flushState();
state = context.getOutputBuilder().getSourceStateUpdates().getState();
// CountingSource's watermark is the last record + 1. i is now one past the last record,
// so the expected watermark is i millis.
assertEquals(TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
assertEquals(1, context.getOutputBuilder().getSourceStateUpdates().getFinalizeIdsList().size());
assertNotNull(readerCache.acquireReader(context.getComputationKey(), context.getWork().getCacheToken(), context.getWorkToken() + 1));
assertEquals(7L, context.getBacklogBytes());
}
}
use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.
the class WorkItemStatusClientTest method populateCounterUpdatesWithOutputCounters.
@Test
public /**
* Validates that an "internal" Counter is reported.
*/
void populateCounterUpdatesWithOutputCounters() throws Exception {
final CounterUpdate counter = new CounterUpdate().setNameAndKind(new NameAndKind().setName("some-counter").setKind(Kind.SUM.toString())).setCumulative(true).setInteger(DataflowCounterUpdateExtractor.longToSplitInt(42));
CounterSet counterSet = new CounterSet();
counterSet.intSum(CounterName.named("some-counter")).addValue(42);
WorkItemStatus status = new WorkItemStatus();
when(worker.getOutputCounters()).thenReturn(counterSet);
when(worker.extractMetricUpdates()).thenReturn(Collections.emptyList());
when(worker.extractMetricUpdates()).thenReturn(Collections.emptyList());
statusClient.setWorker(worker, executionContext);
statusClient.populateCounterUpdates(status);
assertThat(status.getCounterUpdates(), containsInAnyOrder(counter));
}
use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.
the class WorkItemStatusClientTest method populateCounterUpdatesWithMetricsAndCounters.
/**
* Validates that Beam Metrics and "internal" Counters are merged in the update.
*/
@Test
public void populateCounterUpdatesWithMetricsAndCounters() throws Exception {
final CounterUpdate expectedCounter = new CounterUpdate().setNameAndKind(new NameAndKind().setName("some-counter").setKind(Kind.SUM.toString())).setCumulative(true).setInteger(DataflowCounterUpdateExtractor.longToSplitInt(42));
CounterSet counterSet = new CounterSet();
counterSet.intSum(CounterName.named("some-counter")).addValue(42);
final CounterUpdate expectedMetric = new CounterUpdate().setStructuredNameAndMetadata(new CounterStructuredNameAndMetadata().setName(new CounterStructuredName().setOrigin("USER").setOriginNamespace("namespace").setName("some-counter").setOriginalStepName("step")).setMetadata(new CounterMetadata().setKind(Kind.SUM.toString()))).setCumulative(true).setInteger(DataflowCounterUpdateExtractor.longToSplitInt(42));
MetricsContainerImpl metricsContainer = new MetricsContainerImpl("step");
BatchModeExecutionContext context = mock(BatchModeExecutionContext.class);
when(context.extractMetricUpdates(anyBoolean())).thenReturn(ImmutableList.of(expectedMetric));
when(context.extractMsecCounters(anyBoolean())).thenReturn(Collections.emptyList());
CounterCell counter = metricsContainer.getCounter(MetricName.named("namespace", "some-counter"));
counter.inc(1);
counter.inc(41);
counter.inc(1);
counter.inc(-1);
WorkItemStatus status = new WorkItemStatus();
when(worker.getOutputCounters()).thenReturn(counterSet);
when(worker.extractMetricUpdates()).thenReturn(Collections.emptyList());
statusClient.setWorker(worker, context);
statusClient.populateCounterUpdates(status);
assertThat(status.getCounterUpdates(), containsInAnyOrder(expectedCounter, expectedMetric));
}
use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.
the class StreamingDataflowWorker method sendWorkerUpdatesToDataflowService.
/**
* Sends counter updates to Dataflow backend.
*/
private void sendWorkerUpdatesToDataflowService(CounterSet deltaCounters, CounterSet cumulativeCounters) throws IOException {
// Throttle time is tracked by the windmillServer but is reported to DFE here.
windmillQuotaThrottling.addValue(windmillServer.getAndResetThrottleTime());
if (memoryMonitor.isThrashing()) {
memoryThrashing.addValue(1);
}
List<CounterUpdate> counterUpdates = new ArrayList<>(128);
if (publishCounters) {
stageInfoMap.values().forEach(s -> counterUpdates.addAll(s.extractCounterUpdates()));
counterUpdates.addAll(cumulativeCounters.extractUpdates(false, DataflowCounterUpdateExtractor.INSTANCE));
counterUpdates.addAll(deltaCounters.extractModifiedDeltaUpdates(DataflowCounterUpdateExtractor.INSTANCE));
if (hasExperiment(options, "beam_fn_api")) {
Map<Object, List<CounterUpdate>> fnApiCounters = new HashMap<>();
while (!this.pendingMonitoringInfos.isEmpty()) {
final CounterUpdate item = this.pendingMonitoringInfos.poll();
// WorkItem.
if (item.getCumulative()) {
item.setCumulative(false);
// Group counterUpdates by counterUpdateKey so they can be aggregated before sending to
// dataflow service.
fnApiCounters.computeIfAbsent(getCounterUpdateKey(item), k -> new ArrayList<>()).add(item);
} else {
// This is a safety check in case new counter type appears in FnAPI.
throw new UnsupportedOperationException("FnApi counters are expected to provide cumulative values." + " Please, update conversion to delta logic" + " if non-cumulative counter type is required.");
}
}
// so we can avoid excessive I/Os for reporting to dataflow service.
for (List<CounterUpdate> counterUpdateList : fnApiCounters.values()) {
if (counterUpdateList.isEmpty()) {
continue;
}
List<CounterUpdate> aggregatedCounterUpdateList = CounterUpdateAggregators.aggregate(counterUpdateList);
// updates.
if (aggregatedCounterUpdateList.size() > 10) {
CounterUpdate head = aggregatedCounterUpdateList.get(0);
this.counterAggregationErrorCount.getAndIncrement();
// log warning message only when error count is the power of 2 to avoid spamming.
if (this.counterAggregationErrorCount.get() > 10 && Long.bitCount(this.counterAggregationErrorCount.get()) == 1) {
LOG.warn("Found non-aggregated counter updates of size {} with kind {}, this will likely " + "cause performance degradation and excessive GC if size is large.", counterUpdateList.size(), MoreObjects.firstNonNull(head.getNameAndKind(), head.getStructuredNameAndMetadata()));
}
}
counterUpdates.addAll(aggregatedCounterUpdateList);
}
}
}
// Handle duplicate counters from different stages. Store all the counters in a multi-map and
// send the counters that appear multiple times in separate RPCs. Same logical counter could
// appear in multiple stages if a step runs in multiple stages (as with flatten-unzipped stages)
// especially if the counter definition does not set execution_step_name.
ListMultimap<Object, CounterUpdate> counterMultimap = MultimapBuilder.hashKeys(counterUpdates.size()).linkedListValues().build();
boolean hasDuplicates = false;
for (CounterUpdate c : counterUpdates) {
Object key = getCounterUpdateKey(c);
if (counterMultimap.containsKey(key)) {
hasDuplicates = true;
}
counterMultimap.put(key, c);
}
// Clears counterUpdates and enqueues unique counters from counterMultimap. If a counter
// appears more than once, one of them is extracted leaving the remaining in the map.
Runnable extractUniqueCounters = () -> {
counterUpdates.clear();
for (Iterator<Object> iter = counterMultimap.keySet().iterator(); iter.hasNext(); ) {
List<CounterUpdate> counters = counterMultimap.get(iter.next());
counterUpdates.add(counters.get(0));
if (counters.size() == 1) {
// There is single value. Remove the entry through the iterator.
iter.remove();
} else {
// Otherwise remove the first value.
counters.remove(0);
}
}
};
if (hasDuplicates) {
extractUniqueCounters.run();
} else {
// Common case: no duplicates. We can just send counterUpdates, empty the multimap.
counterMultimap.clear();
}
List<Status> errors;
synchronized (pendingFailuresToReport) {
errors = new ArrayList<>(pendingFailuresToReport.size());
for (String stackTrace : pendingFailuresToReport) {
errors.add(new Status().setCode(// rpc.Code.UNKNOWN
2).setMessage(stackTrace));
}
// Best effort only, no need to wait till successfully sent.
pendingFailuresToReport.clear();
}
WorkItemStatus workItemStatus = new WorkItemStatus().setWorkItemId(WINDMILL_COUNTER_UPDATE_WORK_ID).setErrors(errors).setCounterUpdates(counterUpdates);
workUnitClient.reportWorkItemStatus(workItemStatus);
// Send any counters appearing more than once in subsequent RPCs:
while (!counterMultimap.isEmpty()) {
extractUniqueCounters.run();
workUnitClient.reportWorkItemStatus(new WorkItemStatus().setWorkItemId(WINDMILL_COUNTER_UPDATE_WORK_ID).setCounterUpdates(counterUpdates));
}
}
use of org.apache.beam.runners.dataflow.worker.counters.CounterSet in project beam by apache.
the class StreamingDataflowWorker method reportHarnessStartup.
private void reportHarnessStartup() {
DataflowWorkerLoggingMDC.setStageName("startup");
CounterSet restartCounter = new CounterSet();
restartCounter.longSum(StreamingSystemCounterNames.JAVA_HARNESS_RESTARTS.counterName()).addValue(1L);
try {
// Sending a one time update. Use empty counter set for cumulativeCounters (2nd arg).
sendWorkerUpdatesToDataflowService(restartCounter, new CounterSet());
} catch (IOException e) {
LOG.warn("Failed to send harness startup counter", e);
}
}
Aggregations