use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ListMultimap in project beam by apache.
the class StreamingDataflowWorker method sendWorkerUpdatesToDataflowService.
/**
* Sends counter updates to Dataflow backend.
*/
private void sendWorkerUpdatesToDataflowService(CounterSet deltaCounters, CounterSet cumulativeCounters) throws IOException {
// Throttle time is tracked by the windmillServer but is reported to DFE here.
windmillQuotaThrottling.addValue(windmillServer.getAndResetThrottleTime());
if (memoryMonitor.isThrashing()) {
memoryThrashing.addValue(1);
}
List<CounterUpdate> counterUpdates = new ArrayList<>(128);
if (publishCounters) {
stageInfoMap.values().forEach(s -> counterUpdates.addAll(s.extractCounterUpdates()));
counterUpdates.addAll(cumulativeCounters.extractUpdates(false, DataflowCounterUpdateExtractor.INSTANCE));
counterUpdates.addAll(deltaCounters.extractModifiedDeltaUpdates(DataflowCounterUpdateExtractor.INSTANCE));
if (hasExperiment(options, "beam_fn_api")) {
Map<Object, List<CounterUpdate>> fnApiCounters = new HashMap<>();
while (!this.pendingMonitoringInfos.isEmpty()) {
final CounterUpdate item = this.pendingMonitoringInfos.poll();
// WorkItem.
if (item.getCumulative()) {
item.setCumulative(false);
// Group counterUpdates by counterUpdateKey so they can be aggregated before sending to
// dataflow service.
fnApiCounters.computeIfAbsent(getCounterUpdateKey(item), k -> new ArrayList<>()).add(item);
} else {
// This is a safety check in case new counter type appears in FnAPI.
throw new UnsupportedOperationException("FnApi counters are expected to provide cumulative values." + " Please, update conversion to delta logic" + " if non-cumulative counter type is required.");
}
}
// so we can avoid excessive I/Os for reporting to dataflow service.
for (List<CounterUpdate> counterUpdateList : fnApiCounters.values()) {
if (counterUpdateList.isEmpty()) {
continue;
}
List<CounterUpdate> aggregatedCounterUpdateList = CounterUpdateAggregators.aggregate(counterUpdateList);
// updates.
if (aggregatedCounterUpdateList.size() > 10) {
CounterUpdate head = aggregatedCounterUpdateList.get(0);
this.counterAggregationErrorCount.getAndIncrement();
// log warning message only when error count is the power of 2 to avoid spamming.
if (this.counterAggregationErrorCount.get() > 10 && Long.bitCount(this.counterAggregationErrorCount.get()) == 1) {
LOG.warn("Found non-aggregated counter updates of size {} with kind {}, this will likely " + "cause performance degradation and excessive GC if size is large.", counterUpdateList.size(), MoreObjects.firstNonNull(head.getNameAndKind(), head.getStructuredNameAndMetadata()));
}
}
counterUpdates.addAll(aggregatedCounterUpdateList);
}
}
}
// Handle duplicate counters from different stages. Store all the counters in a multi-map and
// send the counters that appear multiple times in separate RPCs. Same logical counter could
// appear in multiple stages if a step runs in multiple stages (as with flatten-unzipped stages)
// especially if the counter definition does not set execution_step_name.
ListMultimap<Object, CounterUpdate> counterMultimap = MultimapBuilder.hashKeys(counterUpdates.size()).linkedListValues().build();
boolean hasDuplicates = false;
for (CounterUpdate c : counterUpdates) {
Object key = getCounterUpdateKey(c);
if (counterMultimap.containsKey(key)) {
hasDuplicates = true;
}
counterMultimap.put(key, c);
}
// Clears counterUpdates and enqueues unique counters from counterMultimap. If a counter
// appears more than once, one of them is extracted leaving the remaining in the map.
Runnable extractUniqueCounters = () -> {
counterUpdates.clear();
for (Iterator<Object> iter = counterMultimap.keySet().iterator(); iter.hasNext(); ) {
List<CounterUpdate> counters = counterMultimap.get(iter.next());
counterUpdates.add(counters.get(0));
if (counters.size() == 1) {
// There is single value. Remove the entry through the iterator.
iter.remove();
} else {
// Otherwise remove the first value.
counters.remove(0);
}
}
};
if (hasDuplicates) {
extractUniqueCounters.run();
} else {
// Common case: no duplicates. We can just send counterUpdates, empty the multimap.
counterMultimap.clear();
}
List<Status> errors;
synchronized (pendingFailuresToReport) {
errors = new ArrayList<>(pendingFailuresToReport.size());
for (String stackTrace : pendingFailuresToReport) {
errors.add(new Status().setCode(// rpc.Code.UNKNOWN
2).setMessage(stackTrace));
}
// Best effort only, no need to wait till successfully sent.
pendingFailuresToReport.clear();
}
WorkItemStatus workItemStatus = new WorkItemStatus().setWorkItemId(WINDMILL_COUNTER_UPDATE_WORK_ID).setErrors(errors).setCounterUpdates(counterUpdates);
workUnitClient.reportWorkItemStatus(workItemStatus);
// Send any counters appearing more than once in subsequent RPCs:
while (!counterMultimap.isEmpty()) {
extractUniqueCounters.run();
workUnitClient.reportWorkItemStatus(new WorkItemStatus().setWorkItemId(WINDMILL_COUNTER_UPDATE_WORK_ID).setCounterUpdates(counterUpdates));
}
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ListMultimap in project beam by apache.
the class PipelineOptionsFactory method parseObjects.
/**
* Using the parsed string arguments, we convert the strings to the expected return type of the
* methods that are found on the passed-in class.
*
* <p>For any return type that is expected to be an array or a collection, we further split up
* each string on ','.
*
* <p>We special case the "runner" option. It is mapped to the class of the {@link PipelineRunner}
* based off of the {@link PipelineRunner PipelineRunners} simple class name. If the provided
* runner name is not registered via a {@link PipelineRunnerRegistrar}, we attempt to obtain the
* class that the name represents using {@link Class#forName(String)} and use the result class if
* it subclasses {@link PipelineRunner}.
*
* <p>If strict parsing is enabled, unknown options or options that cannot be converted to the
* expected java type using an {@link ObjectMapper} will be ignored.
*/
private static <T extends PipelineOptions> Map<String, Object> parseObjects(Class<T> klass, ListMultimap<String, String> options, boolean strictParsing) {
Map<String, Method> propertyNamesToGetters = Maps.newHashMap();
Cache cache = CACHE.get();
cache.validateWellFormed(klass);
@SuppressWarnings("unchecked") Iterable<PropertyDescriptor> propertyDescriptors = cache.getPropertyDescriptors(Stream.concat(getRegisteredOptions().stream(), Stream.of(klass)).collect(Collectors.toSet()));
for (PropertyDescriptor descriptor : propertyDescriptors) {
propertyNamesToGetters.put(descriptor.getName(), descriptor.getReadMethod());
}
Map<String, Object> convertedOptions = Maps.newHashMap();
for (final Map.Entry<String, Collection<String>> entry : options.asMap().entrySet()) {
try {
// Either off by one or off by two character errors.
if (!propertyNamesToGetters.containsKey(entry.getKey())) {
SortedSet<String> closestMatches = new TreeSet<>(Sets.filter(propertyNamesToGetters.keySet(), input -> StringUtils.getLevenshteinDistance(entry.getKey(), input) <= 2));
switch(closestMatches.size()) {
case 0:
throw new IllegalArgumentException(String.format("Class %s missing a property named '%s'.", klass, entry.getKey()));
case 1:
throw new IllegalArgumentException(String.format("Class %s missing a property named '%s'. Did you mean '%s'?", klass, entry.getKey(), Iterables.getOnlyElement(closestMatches)));
default:
throw new IllegalArgumentException(String.format("Class %s missing a property named '%s'. Did you mean one of %s?", klass, entry.getKey(), closestMatches));
}
}
Method method = propertyNamesToGetters.get(entry.getKey());
// Only allow empty argument values for String, String Array, and Collection<String>.
Class<?> returnType = method.getReturnType();
JavaType type = MAPPER.getTypeFactory().constructType(method.getGenericReturnType());
if ("runner".equals(entry.getKey())) {
String runner = Iterables.getOnlyElement(entry.getValue());
final Map<String, Class<? extends PipelineRunner<?>>> pipelineRunners = cache.supportedPipelineRunners;
if (pipelineRunners.containsKey(runner.toLowerCase())) {
convertedOptions.put("runner", pipelineRunners.get(runner.toLowerCase(ROOT)));
} else {
try {
Class<?> runnerClass = Class.forName(runner, true, ReflectHelpers.findClassLoader());
if (!PipelineRunner.class.isAssignableFrom(runnerClass)) {
throw new IllegalArgumentException(String.format("Class '%s' does not implement PipelineRunner. " + "Supported pipeline runners %s", runner, cache.getSupportedRunners()));
}
convertedOptions.put("runner", runnerClass);
} catch (ClassNotFoundException e) {
String msg = String.format("Unknown 'runner' specified '%s', supported pipeline runners %s", runner, cache.getSupportedRunners());
throw new IllegalArgumentException(msg, e);
}
}
} else if (isCollectionOrArrayOfAllowedTypes(returnType, type)) {
// Split any strings with ","
List<String> values = entry.getValue().stream().flatMap(input -> Arrays.stream(input.split(","))).collect(Collectors.toList());
if (values.contains("")) {
checkEmptyStringAllowed(returnType, type, method.getGenericReturnType().toString());
}
convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
} else if (isSimpleType(returnType, type)) {
String value = Iterables.getOnlyElement(entry.getValue());
if (value.isEmpty()) {
checkEmptyStringAllowed(returnType, type, method.getGenericReturnType().toString());
}
convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
} else {
String value = Iterables.getOnlyElement(entry.getValue());
if (value.isEmpty()) {
checkEmptyStringAllowed(returnType, type, method.getGenericReturnType().toString());
}
try {
convertedOptions.put(entry.getKey(), tryParseObject(value, method));
} catch (IOException e) {
throw new IllegalArgumentException("Unable to parse JSON value " + value, e);
}
}
} catch (IllegalArgumentException e) {
if (strictParsing) {
throw e;
} else {
LOG.warn("Strict parsing is disabled, ignoring option '{}' with value '{}' because {}", entry.getKey(), entry.getValue(), e.getMessage());
}
}
}
return convertedOptions;
}
Aggregations